#Input data
M_long = suppressWarnings(data.table::fread('./data/M_long.txt', sep = '\t', header = T)) %>% as.data.frame()
unzip(zipfile = './data/M_wide_all.txt.zip', exdir = "./data")
M_wide_all = suppressWarnings(data.table::fread('./data/M_wide_all.txt', sep = '\t', header = T)) %>% as.data.frame()
M2_all = suppressWarnings(data.table::fread('./data/M2_all.txt', sep = '\t', header = T)) %>% as.data.frame()
M_tmn_wide_st = suppressWarnings(data.table::fread('./data/M_tmn_wide_st.txt', sep = '\t', header = T)) %>% as.data.frame()
M_tmn_h = suppressWarnings(data.table::fread('./data/M_tmn_h.txt', sep = '\t', header = T)) %>% as.data.frame()
M_tmn_long = suppressWarnings(data.table::fread('./data/M_tmn_long.txt', sep = '\t', header = T)) %>% as.data.frame()
P_serial = suppressWarnings(data.table::fread('./data/P_serial.txt', sep = '\t', header = T)) %>% as.data.frame()
T_seer = read_tsv('./data/breast_prop_chemo.txt', col_types = cols())
tmn_inc = read_tsv('./data/CumIncByYear-20190716_KB_Wolff.txt', col_types = cols()) %>%
dplyr::select(Year, yearly_inc) %>% as.data.frame %>% unname %>% as.matrix
mort_inc = read_tsv('./data/overall_mort_50_75.txt', col_types = cols()) %>%
dplyr::select(time, year_inc) %>% as.data.frame %>% unname %>% as.matrix
class_dict <- read.csv("./data/chemoclass_jan2019_revised.csv", stringsAsFactors = F)
drugsets_dict <- read.csv("./data/top_sets_dec2018.csv", header = T, stringsAsFactors = F)
class_dict <- class_dict %>% filter(drugclass_c=="cytotoxic_therapy")
M_wide_all = M_wide_all %>%
mutate(race_b = as.integer(race == "White")
) %>%
mutate(age_scaled = as.vector(scale(age)),
age_d=age/10,
) %>%
mutate(mutnum_all_r = case_when(
mutnum_all ==0 ~ 0,
mutnum_all == 1 ~ 1,
mutnum_all >= 2 ~ 2)
) %>%
mutate(smoke_bin=case_when(
smoke==0 ~ 0,
smoke==1 ~ 1,
smoke==2 ~ 1)
) %>%
mutate(
Gender = relevel(factor(Gender), ref = 'Male'),
race = relevel(factor(race), "White"),
smoke = relevel(factor(smoke), "0"),
smoke_bin = relevel(factor(smoke_bin), "0"),
therapy_binary = relevel(factor(therapy_binary), 'untreated')
)
M_long = M_long %>%
mutate(race_b = as.integer(race == "White")
) %>%
mutate(age_scaled = as.vector(scale(age)),
age_d=age/10,
) %>%
mutate(smoke_bin=case_when(smoke==0 ~ 0,
smoke==1 ~ 1,
smoke==2 ~ 1)
) %>%
mutate(
Gender = relevel(factor(Gender), ref = 'Male'),
race = relevel(factor(race), "White"),
smoke = relevel(factor(smoke), "0"),
smoke_bin = relevel(factor(smoke_bin), "0"),
therapy_binary = relevel(factor(therapy_binary), 'untreated')
)
M_tmn_wide_st <- M_tmn_wide_st %>%
mutate(age_d=age/10)
cbc_vars = c("wbc", "anc", "alc", "amc", "hgb", "mcv", "rdw", "plt")
M_tmn_wide_st = M_tmn_wide_st %>% mutate_at(.funs = funs(c = scale), .vars = cbc_vars)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ddr_genes = c('PPM1D', 'TP53', 'CHEK2')
M2_all <- M2_all %>% mutate(ddr_CH = Gene %in% ddr_genes)
# separate out singletons from serial sampling cohort
M2 = M2_all %>% filter(VAF_1 > 0 & VAF_2 > 0)
M2_single = M2_all %>% filter(VAF_1 == 0 | VAF_2 == 0)
#Define wide data frame with treatment known
M_wide <- M_wide_all %>% filter(therapy_known==1)
#Define long dataframe with non-synonomous and treatment know only
M = M_long %>% filter((therapy_known == 1) & CH_nonsilent == 1)
print('All mutations')
## [1] "All mutations"
nrow(M_long)
## [1] 11076
print('# of Patients')
## [1] "# of Patients"
nrow(M_wide_all)
## [1] 24146
print('# of positive patients')
## [1] "# of positive patients"
unique(M_long$STUDY_ID) %>% length()
## [1] 7216
print('proportion of Patients CH positive')
## [1] "proportion of Patients CH positive"
sum(M_wide_all$CH_all)/nrow(M_wide_all)
## [1] 0.2988487
print('# of CH-PD mutations')
## [1] "# of CH-PD mutations"
sum(M_long$ch_pancan_pd)
## [1] 5810
print('# of CH-my-PD mutations')
## [1] "# of CH-my-PD mutations"
sum(M_long$ch_my_pd)
## [1] 5301
print('table of mutations')
## [1] "table of mutations"
table(M_wide_all$mutnum_all_r)
##
## 0 1 2
## 16930 4952 2264
print('# pts with therapy')
## [1] "# pts with therapy"
table(M_wide$therapy_binary)
##
## untreated treated unknown
## 4160 5978 0
panel_theme = theme_bw() + theme(
panel.border = element_blank(),
legend.position = "none",
panel.grid.minor = element_blank(),
plot.subtitle = element_text(hjust = 0.5, size = 8),
plot.title = element_text(face = 'bold', size = 12, hjust = 0, vjust = -11),
panel.grid.major = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 6),
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 8),
axis.line = element_line(),
plot.margin = unit(c(0,0,0,0), 'pt')
)
age_groups = c("0-10", "11-20", "21-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91-100")
get_ch_grouped = function(M_wide, CI = T) {
CH_by_age_grouped = M_wide %>% select(STUDY_ID, age_cat, CH) %>%
mutate(CH = ifelse(is.na(CH), 0, CH)) %>%
group_by(age_cat) %>%
summarise(CH = sum(CH), total = n()) %>%
filter(!is.na(age_cat)) %>%
mutate(freq = CH / total)
if (CI) {
CH_by_age_grouped = CH_by_age_grouped %>%
cbind(
apply(CH_by_age_grouped, 1, function(row) {
CI = prop.test(row['CH'], row['total'], conf.level=0.95)$conf.int[1:2]
return(c(lower = CI[1], upper = CI[2]))
}) %>% t
)
}
return(CH_by_age_grouped)
}
font_size = 8
age_curve_theme =
theme(
legend.position = 'top',
legend.key.size = unit(5, 'mm'),
legend.title = element_blank(),
legend.direction = 'horizontal',
plot.title = element_text(hjust = -0.08),
axis.text.x = element_text(angle = 45, vjust = 0.5, size = font_size),
axis.text.y = element_text(size = font_size),
axis.title = element_text(size = font_size),
legend.text = element_text(size = font_size)
)
## Histogram by gene frequency
gene_list = M %>% count(Gene) %>% arrange(-n) %>% .$Gene %>% unique %>% .[1:10]
n_treated = M_wide %>% count(therapy_binary) %>% filter(therapy_binary == 'treated') %>% pull(n)
n_untreated = M_wide %>% count(therapy_binary) %>% filter(therapy_binary == 'untreated') %>% pull(n)
# tally
D = M %>%
filter(CH_nonsilent == 1) %>%
reshape2::dcast(
formula = Gene + therapy_binary ~ .,
value.var = 'STUDY_ID',
fun.aggregate = function(STUDY_IDs) {length(unique(STUDY_IDs))}
) %>%
dplyr::rename("n_patient" = ".") %>%
mutate(
prop_patient = case_when(
therapy_binary == 'treated' ~ n_patient/n_treated,
therapy_binary == 'untreated' ~ n_patient/n_untreated
)
) %>%
filter(Gene %in% gene_list) %>%
mutate(
Gene = factor(Gene, gene_list),
therapy_binary = factor(therapy_binary, c('untreated', 'treated'))
) %>%
arrange(Gene)
# need to test for ch_nonsilent, the gene columns in M_wide are ch_pancan_pd
asterisks = lapply(gene_list,
function(gene) {
model = glm(
formula = paste0(gene, ' ~ age_scaled + smoke_bin + race_b + Gender + therapy_binary'),
data = M_wide,
family = "binomial")
treatment_pval = model %>% summary %$% coefficients %>% .['therapy_binarytreated', 'Pr(>|z|)']
treatment_qval = p.adjust(treatment_pval, method = 'fdr', n = length(gene_list))
return(signif.num(treatment_qval, ns = F))
}
)
p_hist = ggplot(
D,
aes(x = Gene, y = prop_patient, fill = therapy_binary)
) +
geom_bar(stat = 'identity', position = "dodge", color = 'black', size = 0.25) +
panel_theme +
theme(
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank(),
legend.key.size = unit(5, 'mm'),
legend.position = 'top',
legend.direction = 'horizontal',
axis.title = element_text(size = font_size),
axis.text.x = element_text(angle = 45, hjust = 1, size = font_size),
legend.text = element_text(size = font_size)
) +
annotate('text', x = gene_list, y = 0.11, label = asterisks, size = 4) +
ylab("Proportion with mutated Gene") +
xlab('') +
scale_fill_manual(values = therapy_colors) +
scale_color_manual(values = therapy_colors)
## stacked bar
mycols = c(
brewer.pal(9, "YlOrBr") %>% rev %>% .[1:3] %>% rev,
brewer.pal(9, "YlOrRd") %>% .[2:5],
brewer.pal(9, "Blues") %>% rev
)
spliceosome_genes = c('SF3B1', 'SRSF2', 'U2AF1')
D = M %>%
mutate(
age_bins = case_when(
age < 40 ~ 1,
age < 65 ~ 2,
age < 75 ~ 3,
age >= 75 ~ 4)
) %>%
group_by(age_bins) %>%
mutate(age_bounds = paste0(round(min(age), 0), '-', round(max(age), 0))) %>%
ungroup() %>%
mutate(age_bins = age_bounds) %>%
mutate(Gene = as.character(Gene)) %>%
group_by(Gene) %>%
mutate(freq = n()) %>%
ungroup() %>%
mutate(
Gene = ifelse(freq > 50 | Gene %in% spliceosome_genes | Gene %in% ddr_genes, Gene, 'Other')
) %>%
arrange(Gene == 'Other', !(Gene %in% spliceosome_genes), !(Gene %in% ddr_genes), freq) %>%
mutate(Gene = factor(Gene, levels = unique(Gene))) %>%
count(age_bins, therapy_binary, Gene) %>%
group_by(age_bins, therapy_binary) %>%
mutate(
total = sum(n),
prop = n/sum(n)
) %>%
ungroup()
p_stack = ggplot(
D,
aes(x = age_bins, y = prop, fill = Gene)
) +
geom_bar(stat = 'identity', size = 0) +
panel_theme +
theme(
legend.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
strip.background = element_rect(fill = 'white', color = 'white'),
axis.text.y = element_text(size = font_size),
axis.text.x = element_text(size = font_size, angle = 45, hjust = 1),
legend.text = element_text(size = font_size/1.5),
legend.key.size = unit(3, "mm"),
axis.title = element_text(size = font_size),
legend.position = 'top'
) +
facet_wrap(~therapy_binary) +
ylab('Proportion of patients') +
xlab('Age') +
scale_fill_manual(values = mycols)
## forest plot
DTA = c('DNMT3A', 'TET2', 'ASXL1')
DDR = c('PPM1D', 'TP53', 'CHEK2')
SPL = c('SF3B1', 'SRSF2')
OTH = c('JAK2', 'ATM')
gene_list = c(DDR, DTA, SPL, OTH)
#ALL adjusted for treatment
logit_gene_var = list()
for (gene in gene_list) {
logit = glm(
formula = get(gene) ~ age_scaled + smoke_bin + race_b + Gender + therapy_binary,
data = M_wide,
family = "binomial")
logit_data = logit %>% sjPlot::get_model_data(type="est") %>% cbind(Gene = gene)
logit_gene_var = rbind(logit_gene_var, logit_data)
}
# for each gene
D = logit_gene_var %>%
filter(!term %in% c("GenderFemale", "race_b")) %>%
mutate(
term = c(
'therapy_binarytreated' = 'Therapy',
'smoke_bin1' = 'Smoking',
'age_scaled' = 'Age'
)[as.character(term)]
) %>%
mutate(term = factor(term, c("Age", "Therapy", "Smoking"))) %>%
mutate(p_fdr = p.adjust(p.value, method = "fdr")) %>%
mutate(termGene = paste0(term, Gene)) %>%
arrange(estimate, Gene) %>%
mutate(termGene = factor(termGene, levels = termGene)) %>%
mutate(gene_cat = case_when(
Gene %in% DTA ~ 'DTA',
Gene %in% DDR ~ 'DDR',
Gene %in% SPL ~ 'Splicing',
T ~ 'Other'
)
) %>%
mutate(gene_cat = factor(gene_cat, c('DDR', 'DTA', 'Splicing', 'Other'))) %>%
mutate(
q.value = p.adjust(p.value, n = nrow(.), method = 'fdr'),
q.label = paste0(signif(estimate, 2), signif.num(q.value)),
q.star = signif.num(q.value)
)
p_forest = plot_forest(
D,
x = "termGene",
label = 'q.star',
eb_w = 0,
eb_s = 0.3,
ps = 1.5,
or_s = 2,
nudge = -0.3,
col = 'gene_cat'
) +
facet_wrap(~term, scale = 'free_y', ncol = 1) +
scale_x_discrete(
breaks = D$termGene,
labels = D$Gene,
expand = c(0.1,0)
) +
xlab('') + ylab('Odds Ratio of CH-PD') +
scale_color_nejm() +
panel_theme +
theme(
axis.text = element_text(size = font_size),
axis.title = element_text(size = font_size),
strip.text = element_text(size = font_size),
legend.position = 'top',
legend.title = element_blank(),
legend.text = element_text(size = font_size/1.2),
legend.key.size = unit(3, "mm")
)
combo = ((p_hist + labs(title = 'A')) / (p_stack + labs(title = 'B'))) | (p_forest+ labs(title = 'C'))
do_plot(combo, "fig1.png", 10, 6, save_pdf = F) #no pdf
#Plotting theme
panel_theme = theme_bw() + theme(
panel.border = element_blank(),
legend.position = "none",
panel.grid.minor = element_blank(),
plot.subtitle = element_text(hjust = 0.5, size = 8),
plot.title = element_text(face = 'bold', hjust = 0, vjust = -2, size = 12),
panel.grid.major = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 6),
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 8),
axis.line = element_line(),
plot.margin = unit(c(0,0,0,0), 'pt')
)
interest_drugs = c("XRT", "ind_platinum",
"ind_topoisomerase_ii_inh", "ind_taxane", "ind_topoisomerase_i_inhi",
"ind_antimetabolite", "ind_microtubule_damaging", "ind_radiotherapy")
interest_genes = c("PPM1D","TP53","CHEK2","DNMT3A","TET2","ASXL1","ATM")
D = lapply(
interest_genes,
function (gene) {
terms = filter_terms(
M_wide %>% filter(get(gene) == 1),
interest_drugs,
verbose = F,
threshold = 10
)
if (length(terms) == 0) {
return(NULL)
}
formula = paste0(
gene, ' ~ ',
paste(terms, collapse = ' + '),
' + XRT + age + smoke_bin + race_b + timedx_impact')
glm(formula = formula, data = M_wide, family = "binomial", na.action = 'na.omit') %>%
sjPlot::get_model_data(type="est") %>% cbind(Gene = gene)
}
) %>%
Reduce(rbind, .) %>%
dplyr::filter(term %in% c(interest_drugs) & Gene %in% interest_genes) %>%
mutate(
term = factor(format_variable(term), format_variable(interest_drugs)),
Gene = factor(Gene, interest_genes)
) %>%
mutate(p_fdr = p.adjust(p.value, method = "fdr")) %>%
rowwise() %>%
mutate(log_or = log(estimate)) %>%
ungroup()
p_heatmap = ggplot(
D,
aes(x = term, y = Gene, fill = log_or)
) +
geom_tile(color = "lightgrey") +
scale_fill_gradient2(
low = "darkblue",
mid = "white",
high = "darkred",
midpoint = 0,
na.value = "white",
limits = c(-2.5, 2.5)
) +
geom_point(
aes(size = ifelse(p_fdr <= 0.05, 1, NA)),
shape = 8,
colour = "black",
show.legend = F
) +
scale_size_area(max_size = 2) +
xlab('') +
ylab('') +
panel_theme +
scale_y_discrete(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_blank(),
legend.position = 'right',
axis.text.x = element_text(size = 8, angle = 30, hjust = 1),
plot.margin = unit(c(20, 0, 20, 0), 'pt'),
legend.text = element_text(size = 4),
legend.title = element_text(size = 4),
legend.key.size = unit(10, "pt")
)
ggsave("./figures/p_heatmap.png", plot = p_heatmap, width = 4, height = 4, dpi = 300)
## Warning: Removed 31 rows containing missing values (geom_point).
knitr::include_graphics("./figures/p_heatmap.png")
## Systemic therapies ##
therapy_labels = c(
"ind_immune_therapy" = "Immune Therapy",
"ind_targeted_therapy" = "Targeted Therapy",
"ind_cytotoxic_therapy" = "Cytotoxic Therapy",
"XRT" = "External Beam Radiation",
"ind_radiotherapy" = "Radionuclide")
n_labels = M_wide %>%
summarise_at(
names(therapy_labels),
term_count
) %>% t %>%
as.data.frame %>%
tibble::rownames_to_column('term') %>%
dplyr::rename(n = V1)
formula = paste0(
'ch_pancan_pd', ' ~ ',
paste(
'timedx_impact',
'ind_cytotoxic_therapy',
'ind_immune_therapy',
'ind_radiotherapy',
'ind_targeted_therapy',
'XRT',
'age',
'smoke_bin',
'race_b', sep = ' + ')
)
summary_systemic = M_wide %>%
glm(
formula = formula,
family = binomial(link="logit"),
na.action = 'na.omit'
) %>%
sjPlot::get_model_data(type = "est",model = .) %>%
left_join(
n_labels,
by = 'term'
) %>%
mutate(term = as.character(term)) %>%
mutate(term = ifelse(term %in% names(therapy_labels), therapy_labels[term], term)) %>%
filter(!str_detect(term, regex("age|smoke_bin1|race_b|timedx_impact"))) %>%
mutate(term = paste0(term, ' (n = ', n, ')')) %>%
mutate(term = format_variable(term)) %>%
arrange(estimate) %>%
mutate(term = factor(term, levels = unique(term)))
## Cytotoxic therapies ##
formula = paste0(
'ch_pancan_pd ~ ',
paste(
'ind_taxane',
'ind_microtubule_damaging',
'ind_antimetabolite',
'ind_alkylating_agent',
'ind_platinum',
'ind_topoisomerase_ii_inh',
'ind_topoisomerase_i_inhi',
'ind_cytotoxic_therapy_ot',
'XRT',
'ind_targeted_therapy',
'ind_immune_therapy',
'age',
'smoke_bin',
'race_b',
'timedx_impact',
sep = ' + '))
drug_list = c(
"ind_taxane",
"ind_microtubule_damaging",
"ind_antimetabolite",
"ind_alkylating_agent",
"ind_platinum",
"ind_topoisomerase_ii_inh",
"ind_topoisomerase_i_inhi")
n_labels = M_wide %>%
summarise_at(
drug_list,
term_count
) %>% t %>%
as.data.frame %>%
tibble::rownames_to_column('term') %>%
dplyr::rename(n = V1)
summary_cytotoxic = M_wide %>%
glm(formula = formula, family = binomial(link = "logit"), na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
left_join(
n_labels,
by = 'term'
) %>%
mutate(term = paste0(term, ' (n = ', n, ')')) %>%
filter(str_detect(term, paste(drug_list, collapse = '|'))) %>%
mutate(term = format_variable(term)) %>%
mutate(term = factor(term, levels = unique(term[order(estimate)]))) %>%
filter(std.error < 100)
## Platinum ##
formula = paste0(
'ch_pancan_pd ~ ',
paste(
'ind_taxane',
'ind_microtubule_damaging',
'ind_antimetabolite',
'ind_alkylating_agent',
'ind_carboplatin',
'ind_cisplatin',
'ind_oxaliplatin',
'ind_topoisomerase_ii_inh',
'ind_topoisomerase_i_inhi',
'ind_radiotherapy',
'ind_targeted_therapy',
'ind_immune_therapy',
'XRT',
'age',
'smoke_bin',
'race_b',
'timedx_impact',
sep = ' + '))
drug_list = c('ind_carboplatin','ind_cisplatin','ind_oxaliplatin')
n_labels = M_wide %>%
summarise_at(
drug_list,
term_count
) %>% t %>%
as.data.frame %>%
tibble::rownames_to_column('term') %>%
dplyr::rename(n = V1)
summary_platinum = M_wide %>%
glm(
formula = formula,
family = binomial(link="logit"),
na.action = 'na.omit'
) %>%
sjPlot::get_model_data(type = "est",model = .) %>%
filter(str_detect(term, regex("carboplatin|cisplatin|oxaliplatin", ignore_case = T))) %>%
left_join(
n_labels,
by = 'term'
) %>% mutate(term = paste0(term, ' (n = ', n, ')')) %>%
filter(str_detect(term, paste(drug_list, collapse = '|'))) %>%
mutate(term = as.character(term)) %>%
mutate(term = format_variable(term)) %>%
arrange(estimate) %>%
mutate(term = factor(term, levels = unique(term)))
summary_platinum %>%
plot_forest(
x = "term",
eb_w = 0,
eb_s = 2,
ps = 3,
or_s = 4
) +
xlab('') +
ylab('OR') +
scale_color_manual(values = pal_nejm()(4)[3:4])
## Combined Plot ##
p_forest = rbind(
summary_systemic %>% mutate(group="Systemic Therapy"),
summary_cytotoxic %>% mutate(group="Cytotoxic Therapy"),
summary_platinum %>% mutate(group="Platinum")
) %>%
mutate(group = factor(group, c('Systemic Therapy', 'Cytotoxic Therapy', 'Platinum'))) %>%
arrange(group, estimate) %>%
mutate(term = factor(term, levels = unique(term))) %>%
plot_forest(
x = "term",
eb_w = 0,
eb_s = 1.5,
label="p.stars",
ps = 2,
or_s = 3) +
facet_wrap(~group, scales = "free_y", ncol = 1) +
panel_theme +
# theme_classic() +
theme(
strip.placement = 'bottom',
strip.text = element_text(size = 8),
plot.margin = unit(c(0,10,0,0), 'pt'),
axis.text.y = element_text(size = 6)
) +
scale_fill_nejm() +
scale_color_nejm() +
ylab('Odds Ratio of CH-PD') +
xlab('') +
scale_x_discrete(expand = c(0.25,0))
g_forest = ggplot_build(p_forest)
gt = ggplot_gtable(g_forest)
panels = gt$layout$t[grepl("panel", gt$layout$name)]
vtiles = c(5,7,3)
gt$heights[panels] <- unit(vtiles, "null")
p_forest = as.ggplot(gt)
p_forest
ggsave("./figures/p_forest.png", plot = p_forest, width = 4, height = 4, dpi = 300)
knitr::include_graphics("./figures/p_forest.png")
formula = paste0(
'ch_pancan_pd ~ ',
paste(
'pct_taxane',
'pct_microtubule_damaging',
'pct_antimetabolite',
'pct_alkylating_agent',
'pct_platinum',
'pct_topoisomerase_ii_inh',
'pct_topoisomerase_i_inhi',
'pct_cytotoxic_therapy_ot',
'pct_targeted_therapy',
'pct_immune_therapy',
'eqd_3_t',
'age',
'smoke_bin',
'race_b',
'timedx_impact',
sep = ' + '))
# p vals for trend
pvals = c(
'pct_platinum' = M_wide %>%
filter(pct_platinum >= 1) %>%
glm(formula = formula,
family = binomial(link = "logit"),
na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
filter(term == 'pct_platinum') %$%
p.value,
'pct_topoisomerase_ii_inh' = M_wide %>%
filter(pct_topoisomerase_ii_inh >= 1) %>%
glm(formula = formula, family = binomial(link = "logit"), na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
filter(term == 'pct_topoisomerase_ii_inh') %$%
p.value,
'eqd_3_t' = M_wide %>%
filter(eqd_3_t >= 1) %>%
glm(formula = formula, family = binomial(link = "logit"), na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
filter(term == 'eqd_3_t') %$%
p.value
)
summary_class_b = M_wide %>%
mutate_at(
c('pct_taxane', 'pct_microtubule_damaging', 'pct_antimetabolite',
'pct_alkylating_agent', 'pct_platinum', 'pct_topoisomerase_ii_inh',
'pct_topoisomerase_i_inhi', 'eqd_3_t'),
factor
) %>%
glm(formula = formula, family = binomial(link="logit"), na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
dplyr::rename(Drug = term) %>%
filter(str_detect(Drug, 'pct_topoisomerase_ii_inh|pct_platinum|eqd_3_t')) %>%
mutate(
Dosage = str_extract(Drug, '[0-9]$'),
Drug = str_replace(Drug, '[0-9]$', '')
) %>%
mutate(
Dosage = factor(Dosage, levels = sort(unique(Dosage), decreasing = F))
) %>%
mutate(
Drug = ifelse(
Drug %in% names(pvals),
paste0(Drug, '\n(p = ', signif(pvals[Drug], 2), ')'),
Drug)
) %>%
mutate(Drug = format_variable(Drug))
## Platinum ##
formula = paste0(
'ch_pancan_pd ~ ',
paste(
'pct_taxane',
'pct_microtubule_damaging',
'pct_antimetabolite',
'pct_alkylating_agent',
'pct_carboplatin',
'pct_cisplatin',
'pct_oxaliplatin',
'pct_topoisomerase_ii_inh',
'pct_topoisomerase_i_inhi',
'pct_cytotoxic_therapy_ot',
'pct_targeted_therapy',
'pct_immune_therapy',
'eqd_3_t',
'age',
'smoke_bin',
'race_b',
'timedx_impact',
sep = ' + '))
drug_list = c('pct_carboplatin', 'pct_cisplatin', 'pct_oxaliplatin')
# p values for trend
pvals = sapply(
drug_list,
function(drug) {
pval = M_wide %>% filter(!!drug > 0) %>%
glm(formula = formula, family = binomial(link = "logit"), na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
filter(term == drug) %$%
p.value %>%
signif(2)
}
)
summary_platinum = M_wide %>%
mutate_at(
drug_list,
factor
) %>%
glm(formula = formula, family = binomial(link = "logit"), na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
dplyr::rename(Drug = term) %>%
mutate(
Dosage = str_extract(Drug, '[0-9]'),
Drug = str_replace(Drug, '[0-9]', '')
) %>%
mutate(Dosage = factor(Dosage, levels = sort(unique(Dosage), decreasing = T))) %>%
filter(Drug %in% drug_list) %>%
mutate(
Drug = ifelse(
Drug %in% names(pvals),
paste0(Drug, '\n(p = ', signif(pvals[Drug], 2), ')'),
Drug)
) %>%
mutate(Drug = format_variable(Drug)) %>%
mutate(Drug = factor(Drug, levels = unique(Drug)))
p_dose = ggplot(
rbind(summary_class_b, summary_platinum) %>%
mutate(Drug = factor(Drug, unique(Drug))),
aes(x = Dosage, y = estimate, group = 1)
) +
geom_point() +
geom_line() +
ylab("OR of CH-PD") +
scale_x_discrete(expand = c(0.1,0)) +
xlab("Tertile of Cumulative Therapy Dose") +
geom_ribbon(aes(ymin=conf.low, ymax=conf.high, x=Dosage, fill = ""), alpha = 0.3) +
facet_wrap(~Drug, nrow = 2, scale = 'free') +
theme_classic() +
panel_theme +
theme(
legend.position = 'none'
)
ggsave("./figures/p_dose.png", plot = p_dose, width = 4, height = 2, dpi = 300)
knitr::include_graphics("./figures/p_dose.png")
panel = (p_forest + labs(title = 'a') | ((p_dose + labs(title = 'b'))/ (p_heatmap + labs(title = 'c')) + plot_layout(heights = c(3,5)))) + plot_layout(widths = c(3, 2))
# panel
do_plot(panel, "fig2.png", 10, 6, save_pdf = F) #no pdf
## Warning: Removed 31 rows containing missing values (geom_point).
# delta vaf significance
M2['pval_T1_vs_T2'] <-
apply(M2, 1, function(mut) {
refs_1 = as.integer(mut['refs_1'])
alts_1 = as.integer(mut['alts_1'])
refs_2 = as.integer(mut['refs_2'])
alts_2 = as.integer(mut['alts_2'])
# if 0 in Serial, then CI def don't overlap
if (refs_2 + alts_2 == 0 | refs_1 + alts_1 == 0) {
p = 0
} else if (alts_1 < 15 | alts_2 < 15) {
p = data.frame(
x = c(alts_1, alts_2),
n = c(refs_1 + alts_1, refs_2 + alts_2),
row.names = c('N', 'T')
) %>% fisher.test %>% .$p.value
} else {
p = prop.test(c(alts_1, alts_2), c(refs_1 + alts_1, refs_2 + alts_2),
conf.level = 0.95, correct = FALSE)$p.value
}
return(p)
})
# grouping based on p value
M2 = M2 %>% mutate(direction = ifelse(pval_T1_vs_T2 < 0.05, ifelse(VAF_2 > VAF_1, 'UP', 'DOWN'), 'STABLE'))
font_size = 8
panel_theme = theme_bw() + theme(
panel.border = element_blank(),
legend.position = "none",
panel.grid.minor = element_blank(),
plot.subtitle = element_text(hjust = 0.5, size = font_size),
plot.title = element_text(face = 'bold', size = 12, hjust = 0, vjust = -5),
axis.title = element_text(size = font_size),
panel.grid.major = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = font_size),
axis.text.y = element_text(size = font_size),
axis.line = element_line(),
plot.margin = unit(c(0,0,0,0), 'pt')
)
therapy_colors = c('#D55E00', '#0072B2')
## Other vs DDR
D = M2 %>%
filter(ddr_CH) %>%
melt(
measure.vars = c('XRT1', 'ind_cytotoxic_therapy1'),
variable.name = 'treatment_type',
value.name = 'treatment'
) %>%
mutate(treatment_type = c('XRT1' = 'XRT', 'ind_cytotoxic_therapy1' = 'Cytotoxic')[treatment_type]) %>%
mutate(treatment = recode(treatment, `1` = 'treated', `0` = 'untreated')) %>%
mutate(treatment = factor(treatment, levels = c('treated', 'untreated')))
jitter = geom_jitter(
pch = 21,
size = 1,
alpha = 0.8,
width = 0.1,
color = 'black',
fill = 'grey'
)
p_ddr_CH = ggplot(
D,
aes(x = treatment, y = log(growth_rate), fill = treatment)) +
geom_boxplot(outlier.alpha = 0, color = 'black') +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("") +
ylab('log growth rate') +
jitter +
facet_wrap(~treatment_type) +
panel_theme +
theme(
axis.text.x = element_blank()
) +
scale_y_continuous(
expand = c(0.1, 0),
limits = c(-0.006, 0.006)
) +
stat_compare_means(
aes(label = paste0("p = ", ..p.format..)),
method = 't.test',
comparisons = list(c('treated', 'untreated')),
size = 2.5
) +
labs(subtitle = 'DDR') +
scale_fill_manual(values = therapy_colors)
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
D = M2 %>%
filter(!ddr_CH) %>%
melt(
measure.vars = c('XRT1', 'ind_cytotoxic_therapy1'),
variable.name = 'treatment_type',
value.name = 'treatment'
) %>%
mutate(treatment_type = c('XRT1' = 'XRT', 'ind_cytotoxic_therapy1' = 'Cytotoxic')[treatment_type]) %>%
mutate(treatment = recode(treatment, `1` = 'treated', `0` = 'untreated')) %>%
mutate(treatment = factor(treatment, levels = c('treated', 'untreated')))
p_other = ggplot(
D,
aes(x = treatment, y = log(growth_rate), fill = treatment)) +
geom_boxplot(outlier.alpha = 0, color = 'black') +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("") +
ylab('') +
jitter +
facet_wrap(~treatment_type) +
panel_theme +
theme(
axis.text.x = element_blank()
) +
scale_y_continuous(
expand = c(0.1, 0),
limits = c(-0.006, 0.006)
) +
stat_compare_means(
aes(label = paste0("p = ", ..p.format..)),
method = 't.test',
comparisons = list(c('treated', 'untreated')),
size = 2.5
) +
labs(subtitle = 'Other') +
scale_fill_manual(values = therapy_colors)
## Clonal competition
D = M2 %>%
group_by(STUDY_ID) %>%
filter(any(ddr_CH) & any(!ddr_CH)) %>%
ungroup() %>%
group_by(STUDY_ID, ddr_CH) %>%
summarise(
max_log_alpha = log(max(growth_rate)),
therapy_binary = unique(therapy_binary),
Gene = ifelse(any(ddr_CH), Gene[1], 'other')
) %>%
mutate(therapy_binary = ifelse(therapy_binary == 1, 'treated', 'untreated')) %>%
mutate(therapy_binary = factor(therapy_binary, c('treated', 'untreated'))) %>%
mutate(ddr_CH = ifelse(ddr_CH, 'DDR', 'Other')) %>%
mutate(ddr_CH = factor(ddr_CH, c('DDR', 'Other'))) %>%
ungroup()
p_tr = D %>%
filter(therapy_binary == 'treated') %>%
dcast(STUDY_ID ~ ddr_CH, value.var = 'max_log_alpha') %>%
{t.test(.[['DDR']], .[['Other']], paired = TRUE, alternatie = 'two.sided')$p.value}
p_untr = D %>%
filter(therapy_binary == 'untreated') %>%
dcast(STUDY_ID ~ ddr_CH, value.var = 'max_log_alpha') %>%
{t.test(.[['DDR']], .[['Other']], paired = TRUE, alternatie = 'two.sided')$p.value}
D = D %>%
arrange(therapy_binary) %>%
mutate(
therapy_binary = ifelse(
therapy_binary == 'treated',
paste0(therapy_binary, ' (n = ', sum(D$therapy_binary == 'treated')/2, ')', '\n', 'p = ', format(signif(p_tr, 2), scientific = T)),
paste0(therapy_binary, ' (n = ', sum(D$therapy_binary == 'untreated')/2, ')', '\n', 'p = ', format(signif(p_untr, 2), scientific = T))
)
) %>%
mutate(therapy_binary = factor(therapy_binary, unique(therapy_binary)))
p_comp = ggplot(
D,
aes(x = ddr_CH, y = max_log_alpha, group = STUDY_ID, fill = therapy_binary, color = therapy_binary)
) +
geom_line(alpha = 0.7) +
geom_point(
color = 'black', pch = 21, fill = 'grey'
) +
panel_theme +
xlab('') +
ylab('log growth rate') +
scale_fill_manual(values = therapy_colors) +
scale_color_manual(values = therapy_colors) +
facet_wrap(~therapy_binary, ncol = 2)
## Single genes
dta_genes = c('DNMT3A', 'TET2', 'ASXL1')
ddr_genes = c('PPM1D', 'TP53', 'CHEK2')
gene_list = c(ddr_genes, dta_genes)
D = M2 %>%
filter(Gene %in% gene_list) %>%
mutate(
therapy_binary = recode(therapy_binary, `1` = 'treated', `0` = 'untreated')
) %>%
mutate(therapy_binary = factor(therapy_binary, c('treated', 'untreated'))) %>%
mutate(Gene = factor(Gene, gene_list)) %>%
mutate(gene_cat = ifelse(Gene %in% dta_genes, 'DTA', 'DDR'))
tests = D %>%
filter(Gene != 'CHEK2') %>%
group_by(Gene) %>%
summarise(
p.val = t.test(
log(growth_rate[therapy_binary == 'treated']),
log(growth_rate[therapy_binary == 'untreated'])
)$p.value
) %>%
rowwise() %>%
mutate(q.val = p.adjust(p.val, method = 'fdr', n = nrow(.)))
D = D %>% left_join(tests, by = 'Gene') %>%
arrange(Gene) %>%
mutate(gene_label = paste0(Gene, '\nq = ', signif(q.val,2))) %>%
mutate(gene_label = factor(gene_label, unique(gene_label)))
p_genes = ggplot(
D,
aes(x = therapy_binary, y = log(growth_rate), fill = therapy_binary)
) +
geom_boxplot(outlier.alpha = 0, color = 'black') +
facet_wrap(~gene_label, nrow = 1) +
geom_jitter(
pch = 21,
size = 1,
alpha = 0.8,
width = 0.1,
color = 'black',
fill = 'grey'
) +
ylab('log growth rate') +
xlab('') +
panel_theme +
scale_fill_manual(values = therapy_colors) +
theme(
legend.title = element_blank(),
# axis.text.x = element_text(angle = 15, size = font_size/1.2, hjust = 0.5),
axis.text.x = element_blank(),
legend.position = 'right'
)
## Dosage
# p values
get_p = function(M, indi) {
M %>%
filter(get(indi) >= 1) %>%
filter(!is.na(age_1)) %>%
geepack::geeglm(
data = .,
formula = as.formula(paste0('log(growth_rate) ~ ', indi, ' + age_1 + Gender + smoke_bin')),
id = STUDY_ID,
corstr = "exchangeable"
) %>%
summary %>% .$coefficients %>%
as.data.frame %>%
tibble::rownames_to_column('factor') %>%
filter(factor == indi) %>%
pull('Pr(>|W|)') %>%
signif(2) %>%
format(scientific = T)
}
pval_ddr_CH_xrt = M2 %>% filter(ddr_CH) %>% get_p('eqd_3_t1')
pval_other_xrt = M2 %>% filter(!ddr_CH) %>% get_p('eqd_3_t1')
pval_ddr_CH_cyto = M2 %>% filter(ddr_CH) %>% get_p('pct_cytotoxic_therapy1')
pval_other_cyto = M2 %>% filter(!ddr_CH) %>% get_p( 'pct_cytotoxic_therapy1')
pct_continuous <- function(M, title = '') {
Mr = M %>% group_by(pct, treatment_type) %>%
summarise(
mean_log_alpha = mean(log(growth_rate)),
lower = quantile(log(growth_rate), 0.25),
upper = quantile(log(growth_rate), 0.75)
)
p = ggplot(
M,
aes(x = pct, y = log(growth_rate))
) +
geom_ribbon(
data = Mr,
inherit.aes = F,
aes(x = pct,
ymin = lower,
ymax = upper,
group = ''
),
fill = pal_nejm()(8)[6],
alpha = 0.6
) +
geom_line(
data = Mr,
inherit.aes = F,
aes(x = pct,
y = mean_log_alpha,
group = ''
),
color = pal_nejm()(8)[6]
) +
facet_wrap(~treatment_type, nrow = 1) +
labs(subtitle = title) +
panel_theme +
theme(axis.title.x.top = element_text(size = 4)
) +
xlab('dosage tertile') +
ylab('log growth rate') +
geom_jitter(
pch = 21,
width = 0.1,
size = 1,
alpha = 1,
color = 'black',
fill = 'grey'
) +
scale_fill_nejm()
return(p)
}
D = M2 %>%
melt(
measure.vars = c('eqd_3_t1', 'pct_cytotoxic_therapy1'),
variable.name = 'treatment_type',
value.name = 'pct'
) %>%
filter(!is.na(pct)) %>%
mutate(pct = factor(as.integer(pct))) %>%
mutate(treatment_type = c('eqd_3_t1' = 'XRT', 'pct_cytotoxic_therapy1' = 'Cytotoxic')[treatment_type])
p_dose_ddr = pct_continuous(
D %>% filter(ddr_CH) %>%
mutate(treatment_type = paste0(treatment_type, '\n(p = ', c('Cytotoxic' = pval_ddr_CH_cyto, 'XRT' = pval_ddr_CH_xrt)[treatment_type], ')')),
'DDR'
) + ylim(-0.006, 0.006)
p_dose_other = pct_continuous(
D %>% filter(!ddr_CH) %>%
mutate(treatment_type = paste0(treatment_type, '\n(p = ', c('Cytotoxic' = pval_other_cyto, 'XRT' = pval_other_xrt)[treatment_type], ')')),
'Other'
) + ylab('') + ylim(-0.006, 0.006)
dat = M2
direction_colors = c('firebrick3', 'grey45', 'lightskyblue') %>%
setNames(c('UP', 'STABLE', 'DOWN')) %>%
scale_color_manual(name = "status", values = .)
tx_categories = c("Untreated", "Targeted or\nImmune Therapy", "Cytotoxic Therapy", "XRT")
dat$tx_category = ifelse(dat$ind_anychemo1==0 & dat$XRT1==0, tx_categories[1],
ifelse((dat$ind_targeted_therapy1==1 | dat$ind_immune_therapy1==1) &
dat$ind_cytotoxic_therapy1==0 & dat$XRT1==0,
tx_categories[2],
ifelse(dat$ind_cytotoxic_therapy1==1, tx_categories[3],
ifelse(dat$XRT1 == 1 , tx_categories[4],
"OTHER"))))
dat <- dat %>% mutate(changeT2T1 = VAF_2-VAF_1)
dat <- dat %>% mutate(delta_per_year = changeT2T1 / (delta_time/365))
dat_delta_avg <- dat %>% filter(tx_category != "OTHER") %>% group_by(tx_category) %>%
summarise(mean_delta_per_year = mean(delta_per_year),
mean_delta_percent = round(100 * mean_delta_per_year, 2),
delta_time = 365)
p_spider = dat %>%
filter(tx_category != "OTHER") %>%
mutate(tx_category = factor(tx_category, tx_categories)) %>%
ggplot(
aes(x = delta_time, y = changeT2T1, group = paste0(STUDY_ID, variant_id), color = direction)
) +
geom_segment(
data = . %>% filter(direction == 'STABLE'),
aes(x = delta_time, y = changeT2T1, xend = 0, yend = 0, color = direction),
size = 0.2,
alpha = 0.5
) +
geom_point(
data = . %>% filter(direction == 'STABLE'),
size = 0.2,
alpha = 0.8
) +
geom_segment(
data = . %>% filter(direction != 'STABLE'),
aes(x = delta_time, y = changeT2T1, xend = 0, yend = 0, color = direction),
size = 0.2,
alpha = 0.5
) +
geom_point(
data = . %>% filter(direction != 'STABLE'),
size = 0.2,
alpha = 0.8
) +
theme_classic() +
labs(
x = "Days between Blood Draw",
y = "Change in VAF"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position="top"
) +
facet_wrap(~tx_category, ncol = 4, nrow = 1) +
panel_theme +
theme(
legend.title = element_blank(),
legend.position = 'right'
) +
direction_colors
panel = (p_spider + labs(title = 'A')) /
((p_ddr_CH + labs(title = 'B'))| p_other) /
(p_genes + labs(title = 'C')) /
((p_dose_ddr + labs(title = 'D'))| p_dose_other) /
(p_comp + labs(title = 'E'))
do_plot(panel, "figure3.png", 6.5, 11, save_pdf = F) #no pdf
#CH overall
M_tmn_wide_st <- M_tmn_wide_st %>% mutate(center =fct_relevel(center, "MSK","MDA","MOF","HCC"))
cox <- coxph(Surv(timelastfu, post_tmn) ~ ch_my_pd + age_d + Gender + strata(center), data= M_tmn_wide_st)
cox
## Call:
## coxph(formula = Surv(timelastfu, post_tmn) ~ ch_my_pd + age_d +
## Gender + strata(center), data = M_tmn_wide_st)
##
## coef exp(coef) se(coef) z p
## ch_my_pd 1.78922 5.98481 0.25235 7.090 1.34e-12
## age_d -0.12970 0.87836 0.09867 -1.314 0.1887
## GenderM 0.57438 1.77602 0.25160 2.283 0.0224
##
## Likelihood ratio test=52.32 on 3 df, p=2.565e-11
## n= 9437, number of events= 75
cox <- coxph(Surv(timelastfu, post_tmn) ~ ch_my_pd*center + age_d + Gender + strata(center), data= M_tmn_wide_st)
cox
## Call:
## coxph(formula = Surv(timelastfu, post_tmn) ~ ch_my_pd * center +
## age_d + Gender + strata(center), data = M_tmn_wide_st)
##
## coef exp(coef) se(coef) z p
## ch_my_pd 1.9089 6.7455 0.3911 4.880 1.06e-06
## centerMDA NA NA 0.0000 NA NA
## centerMOF NA NA 0.0000 NA NA
## centerHCC NA NA 0.0000 NA NA
## age_d -0.1306 0.8776 0.1030 -1.268 0.2049
## GenderM 0.5357 1.7087 0.2537 2.112 0.0347
## ch_my_pd:centerMDA 0.6278 1.8735 0.7253 0.866 0.3867
## ch_my_pd:centerMOF -0.9976 0.3688 0.6810 -1.465 0.1430
## ch_my_pd:centerHCC -0.1814 0.8341 0.6186 -0.293 0.7694
##
## Likelihood ratio test=56.42 on 6 df, p=2.396e-10
## n= 9437, number of events= 75
#Mutation number and max VAF
mut_var <- c("n_mut_r","VAF_nonsilent_r")
cox_data <- list()
cox_mut_var <- list()
for (var in mut_var) {
cox <- coxph(Surv(timelastfu, post_tmn) ~ as.factor(get(var))+ age + Gender + strata(center), data= M_tmn_wide_st)
cox_data <- cox %>% get_model_data(type="est")
cox_data <- cox_data %>% cbind(mut_var = var)
cox_mut_var <- rbind(cox_mut_var, cox_data)
}
cox_mut_var_stonly<- cox_mut_var %>% filter(term!="age" & term!="GenderM")
#Gene Effects
top_genes_cases <- M_tmn_long %>%
filter(VAF_1 >= 0.02 & center!="PMC" & center!="WSU" & post_tmn==1) %>%
group_by(Gene) %>%
summarise(count=n_distinct(center_id)) %>%
arrange(-count) %>%
filter(count>=2) %>%
pull(Gene) %>%
as.character()
top_genes_controls <- M_tmn_long %>%
filter(VAF_1 >= 0.02 & center!="PMC" & center!="WSU") %>%
group_by(Gene) %>%
summarise(count=n_distinct(center_id)) %>%
arrange(-count) %>%
filter(count>=20) %>%
pull(Gene) %>%
as.character()
top_genes <- top_genes_cases %>% intersect(top_genes_controls)
cox_gene_var <- list()
for (gene in top_genes) {
cox <- coxph(Surv(timelastfu, post_tmn) ~ get(gene) + age + Gender + strata(center), data= M_tmn_wide_st, na.action=na.omit)
cox_data <- cox %>% get_model_data(type="est")
cox_data <- cox_data %>% cbind(Gene = gene)
cox_gene_var <- rbind(cox_gene_var, cox_data)
}
#CBC parameters
cbc_var <- c("hgb_c","mcv_c","rdw_c","wbc_c","hgb_c","anc_c","amc_c","plt_c")
cox_cbc_var <- list()
for (var in cbc_var) {
cox <- coxph(Surv(timelastfu, post_tmn) ~ get(var)+ age + Gender + strata(center), data= M_tmn_wide_st , na.action=na.omit)
cox_data <- cox %>% get_model_data(type="est")
cox_data <- cox_data %>% cbind(CBC_var = var)
cox_cbc_var <- rbind(cox_cbc_var, cox_data)
}
cbc = cox_cbc_var %>%
filter(CBC_var %in% cbc_var) %>%
filter (term=="get(var)") %>%
mutate(CBC = case_when(
CBC_var == 'wbc_c' ~ "White Blood Cell Count",
CBC_var == 'anc_c' ~ "Neutrophil Count",
CBC_var == 'amc_c' ~ "Monocyte Count",
CBC_var == 'hgb_c' ~ "Hemoglobin",
CBC_var == 'mcv_c' ~ "Mean Corpuscular Volume",
CBC_var == 'rdw_c' ~ "Red Cell Distribution Width",
CBC_var == 'plt_c' ~ "Platelets"
))
#One forest plot for all
cox_gene_var_v2 <- cox_gene_var %>%
filter(term=="get(gene)") %>%
mutate(group="Gene") %>%
mutate(mut_var= factor(Gene, levels=unique(Gene[order(estimate)])))
cbc_v2 <- cbc %>% mutate(mut_var=CBC) %>% mutate(group="Blood Count Index")
cox_gene_var_v2 <- select(cox_gene_var_v2,-c(Gene))
cbc_v2 <- select(cbc_v2,-c(CBC_var,CBC)) %>% mutate(mut_var= factor(mut_var, levels=unique(mut_var[order(estimate)])))
cox_mut_var_stonly_v2 <- cox_mut_var_stonly %>%
mutate(new_var=case_when(
term=="as.factor(get(var))1" & mut_var=="n_mut_r" ~ "1",
term=="as.factor(get(var))2" & mut_var=="n_mut_r" ~ "2",
term=="as.factor(get(var))3" & mut_var=="n_mut_r" ~ "3 or more"
))
cox_mut_var_stonly_v2 <- cox_mut_var_stonly_v2 %>%
mutate(new_var=case_when(
term=="as.factor(get(var))1" & mut_var=="VAF_nonsilent_r" ~ "2-5%",
term=="as.factor(get(var))2" & mut_var=="VAF_nonsilent_r" ~ "5-10%",
term=="as.factor(get(var))3" & mut_var=="VAF_nonsilent_r" ~ "10-20%",
term=="as.factor(get(var))4" & mut_var=="VAF_nonsilent_r" ~ "20% or more",
TRUE ~ new_var
))
cox_mut_var_stonly_v2 <- cox_mut_var_stonly_v2 %>%
mutate(group=case_when(
mut_var=="n_mut_r" ~ "Mutation Number",
mut_var=="VAF_nonsilent_r" ~ "Maximum VAF"))
cox_mut_var_stonly_v2 <- cox_mut_var_stonly_v2 %>% select(-c("mut_var"))
cox_mut_var_stonly_v2 <- cox_mut_var_stonly_v2 %>% rename(mut_var=new_var)
combo <- rbind(cox_gene_var_v2, cbc_v2, cox_mut_var_stonly_v2)
uni_plot = plot_forest(
combo,
x = "mut_var",
label = 'p.stars',
eb_w = 0,
eb_s = 0.3,
ps = 1.5,
or_s = 2,
nudge = -0.3) +
# col = 'group'
facet_grid(group ~ ., scale = 'free_y', space = "free_y") +
scale_x_discrete(
breaks = combo$mut_var,
labels = combo$mut_var,
expand = c(0.2,0)
) +
xlab('') + ylab('Hazard Ratio of tMN') +
scale_color_nejm() +
panel_theme
do_plot(uni_plot, "figure4a.png", w = 3, h = 4)
#Dont include PPM1D and SRSF2 b/c missing in some
gene_vars = c("TET2","SF3B1","TP53","U2AF1")
mut_vars = c("VAF_nonsilent_r","n_mut_r")
cbc_vars = c("hgb_c","rdw_c", "mcv_c", "wbc_c","anc_c", "plt_c")
demo_vars = c('age', 'Gender','strata(center)')
#include mean imputation for blood counts when missing
D = M_tmn_wide_st %>%
filter(!is.na(age)) %>%
filter(center != 'PMC') %>%
mutate_at(vars(cbc_vars),.funs = funs(ifelse(is.na(.), mean(., na.rm=TRUE), .)))
form = paste0(
'Surv(timelastfu, post_tmn) ~ ',
paste(
c(
gene_vars,
mut_vars,
cbc_vars,
demo_vars
),
collapse = ' + '
)
)
model = coxph(formula = as.formula(form), data = D, na.action=na.omit) %>% summary
betas = model %>%
.$coefficients %>% as.data.frame %>%
tibble::rownames_to_column('term') %>%
filter(!(term %in% c(
'pre_any_therapy',
'GenderM')
)
) %>% select(term, coef)
# add beta of therapy from external source
betas = betas %>% rbind(data.frame(term = 'chemo', coef = log(6.8)))
# prepare covariate matrix
Z = left_join(
#add average chemo
M_tmn_wide_st %>% mutate(
age_bin = case_when(
age < 20 ~ '<20',
age >= 20 & age < 25 ~ '20-24',
age >= 25 & age < 30 ~ '25-29',
age >= 30 & age < 35 ~ '30-34',
age >= 35 & age < 40 ~ '35-39',
age >= 40 & age < 45 ~ '40-44',
age >= 45 & age < 50 ~ '45-49',
age >= 50 & age < 55 ~ '50-54',
age >= 55 & age < 60 ~ '55-59',
age >= 60 & age < 65 ~ '60-64',
age >= 65 & age < 70 ~ '65-69',
age >= 70 & age < 75 ~ '70-74',
age >= 75 & age < 80 ~ '75-79',
age >= 80 & age < 85 ~ '80-84',
age >= 85 ~ '85+'
)
),
T_seer %>%
rename(age_bin = age) %>%
mutate(age_bin = str_remove(age_bin, ' years')),
by = 'age_bin'
) %>%
filter(age > 50 & age < 75) %>%
filter(pre_any_therapy == 0) %>%
filter(center == 'MSK') %>%
select(betas$term, post_tmn) %>%
filter(complete.cases(.))
y = Z %>% pull(post_tmn)
Z = Z %>% select(-post_tmn)
# info for the covariates
Z_info = lapply(betas$term, function(x){list(name = x, type = 'continuous')})
## formula
formula = paste0(
'observed.outcome ~ ',
paste(
betas$term,
collapse = '+',
sep = '+'
)
)
# modified chemoxrt column
Z_tr = Z %>% mutate(chemo = 1)
Z_untr = Z %>% mutate(chemo = 0)
## build model
res_tr = iCARE::computeAbsoluteRisk(
model.formula = as.formula(formula),
model.cov.info = Z_info,
model.ref.dataset = Z,
model.log.RR = as.matrix(unname(tibble::column_to_rownames(betas, 'term'))),
model.disease.incidence.rates = tmn_inc,
model.competing.incidence.rates = mort_inc,
apply.age.start = 1,
apply.age.interval.length = 9,
apply.cov.profile = Z_tr,
return.refs.risk = TRUE
)
res_untr = iCARE::computeAbsoluteRisk(
model.formula = as.formula(formula),
model.cov.info = Z_info,
model.ref.dataset = Z,
model.log.RR = as.matrix(unname(tibble::column_to_rownames(betas, 'term'))),
model.disease.incidence.rates = tmn_inc,
model.competing.incidence.rates = mort_inc,
apply.age.start = 1,
apply.age.interval.length = 9,
apply.cov.profile = Z_untr,
return.refs.risk = TRUE
)
#Figure 4b
D = data.frame(
risk_ref = as.vector(res_tr$refs.risk),
risk_tr = as.vector(res_tr$risk),
risk_untr = as.vector(res_untr$risk),
Z
) %>%
mutate(
risk_untr_pct = ntile(risk_untr, 100)
)
p_dist <- D %>% ggplot(
aes(x = risk_ref)
) +
theme_classic() +
geom_density(alpha = 0.5,color = "black", fill = "gray") +
xlim(0.0,0.022) +
ylim(0.0, 400) +
theme_classic() +
geom_density(alpha = 0.5) +
xlim(0,0.022) +
scale_fill_nejm() +
ylab('Number of Women') +
xlab("10-year absolute risk of AML/MDS for women with breast cancer in the U.S aged 50-75")
do_plot(p_dist, "figure4b.svg", w = 6, h = 2.5)
#Calculate number of women with absolute risk of more than 1%
morethan1 <- D %>% filter(risk_ref>0.01) %>% count()
total <- D %>% count()
(total-morethan1)/total
#Figure 4c
D = data.frame(
risk_ref = as.vector(res_tr$refs.risk),
risk_tr = as.vector(res_tr$risk),
risk_untr = as.vector(res_untr$risk)
) %>%
mutate(
risk_untr_pct = ntile(risk_untr, 100)
)%>%
melt(
measure.vars = c('risk_tr', 'risk_untr', 'risk_ref'),
value.name = 'risk',
variable.name = 'treatment'
)
therapy_colors = c("#D55E00","#0072B2")
p <- ggplot(D %>% filter(treatment %in% c('risk_tr', 'risk_untr')) %>%
filter(risk_untr_pct >= 97),
aes(x = as.factor(risk_untr_pct),
y = risk,
fill = treatment
)
) +
theme_bw() +
scale_y_continuous(position = "left", limits = c(0,0.2)) +
geom_boxplot(
position = 'dodge',
outlier.alpha = 0,
alpha = 0.5,
size = 0.4
) +
scale_fill_nejm() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_line(linetype = 'dashed'),
legend.position = 'right'
) +
xlab('Absolute Risk Percentile') +
ylab('Absolute 10-year AML/MDS Risk')
p_zoom <- p+labs(fill = "Cytotoxic Therapy") + theme(legend.position = c(0.2, 0.7)) + scale_fill_manual(values = therapy_colors,labels = c("Yes", "No")) + scale_color_manual(values = therapy_colors)
do_plot(p_zoom, "figure4c_box.svg", w = 6, h = 3)
tally_variable = function(D, baseline_cols, variable) {
if (variable == 'Total') {
res = cbind(
D %>% dcast(. ~ CH_all, fun.aggregate = length, value.var = 'STUDY_ID') %>% select(-.),
D %>% dcast(. ~ therapy_binary, fun.aggregate = length, value.var = 'STUDY_ID') %>% select(-.)
) %>%
mutate(Total = rowSums(.[1:2])) %>%
mutate(variable = 'Total', category = 'Total') %>%
select(baseline_cols) %>%
mutate(ref = "")
} else {
ref = levels(D[[variable]])[1]
if (is.null(ref)) {
ref = ""
}
res = cbind(
D %>% dcast(paste0(variable, ' ~ CH_all'), fun.aggregate = length, value.var = 'STUDY_ID') %>%
mutate(variable = variable) %>% rename(category = !!variable),
D %>% dcast(paste0(variable, ' ~ therapy_binary'), fun.aggregate = length, value.var = 'STUDY_ID'),
D %>% count(get(variable)) %>% select(n) %>% rename(Total = n)
) %>%
select(baseline_cols) %>%
arrange(category == 'Missing') %>% # sorting
mutate(ref = ref)
}
res = res %>%
mutate(`CH-` = paste0(`CH-`, ' (', signif(`CH-`* 100/Total, 2), '%)')) %>%
mutate(`CH+` = paste0(`CH+`, ' (', signif(`CH+`* 100/Total, 2), '%)'))
return(res)
}
D = M_wide_all %>%
mutate(CH_all = ifelse(CH_all, 'CH+', 'CH-')) %>%
mutate(age_verbose = case_when(
is.na(age_cat) ~ 'Missing',
T ~ paste0((age_cat - 1) * 10, '-', age_cat * 10))
) %>%
mutate(smoke_verbose = case_when(
smoke_bin == 1 ~ "Current/Former",
smoke_bin == 0 ~ "Non Smoker",
T ~ 'Missing')
) %>%
mutate(smoke_verbose = relevel(factor(smoke_verbose), ref = 'Non Smoker')) %>%
mutate(therapy_binary = ifelse(is.na(therapy_binary), 'Unknown', therapy_binary)) %>%
mutate(therapy_binary = factor(therapy_binary, c("Treated", "Untreated", "Unknown"))) %>%
mutate(Gender = relevel(factor(Gender), ref = 'Male')) %>%
mutate(generaltumortype = ifelse(
generaltumortype == '' | is.na(generaltumortype), 'Missing', generaltumortype)
) %>%
mutate(generaltumortype = format_variable(generaltumortype))
baseline_cols = c('variable', 'category', 'CH-', 'CH+', 'Total')
rbind(
D %>% tally_variable(baseline_cols = baseline_cols, variable = 'Total'),
D %>% tally_variable(baseline_cols = baseline_cols, variable = 'smoke_verbose'),
D %>% tally_variable(baseline_cols = baseline_cols, variable = 'Gender'),
D %>% tally_variable(baseline_cols = baseline_cols, variable = 'age_verbose'),
D %>% tally_variable(baseline_cols = baseline_cols, variable = 'race'),
D %>% tally_variable(baseline_cols = baseline_cols, variable = 'therapy_binary')
) %>%
mutate(variable = ifelse(variable == 'generaltumortype', 'Primary Tumor Subtype', variable)) %>%
mutate(variable = format_variable(variable)) %>%
select(-ref) %>%
kable(format = "latex", booktabs = T) %>%
kable_styling(
latex_options = c("hold_position"),
full_width = T,
font_size = 10
) %>%
column_spec(1, width = 10) %>%
column_spec(2:5, width = 70) %>%
collapse_rows(columns = 1:2, row_group_label_position = 'stack') %>%
display_kable('table1.1.pdf')
D %>% tally_variable(baseline_cols = baseline_cols, variable = 'generaltumortype') %>%
mutate(variable = ifelse(variable == 'generaltumortype', 'Primary Tumor Subtype', variable)) %>%
mutate(variable = format_variable(variable)) %>%
select(-ref) %>%
kable(format = "latex", booktabs = T) %>%
kable_styling(
latex_options = c("hold_position"),
full_width = T,
font_size = 10
) %>%
column_spec(1, width = 10) %>%
column_spec(2, width = 200) %>%
column_spec(3:7, width = 50) %>%
collapse_rows(columns = 1:2, row_group_label_position = 'stack') %>%
display_kable('table1.2.pdf')
#Supplementary Table 1
D = M_wide_all %>%
mutate(therapy_known = ifelse(therapy_known, 'Yes', 'No')) %>%
mutate(age_verbose = case_when(
is.na(age_cat) ~ 'Missing',
T ~ paste0((age_cat - 1) * 10, '-', age_cat * 10))
) %>%
mutate(smoke_verbose = case_when(
smoke_bin == 1 ~ "Current/Former",
smoke_bin == 0 ~ "Non Smoker",
T ~ 'Missing')
) %>%
mutate(smoke_verbose = relevel(factor(smoke_verbose), ref = 'Non Smoker')) %>%
mutate(Gender = relevel(factor(Gender), ref = 'Male')) %>%
mutate(generaltumortype = ifelse(
generaltumortype == '' | is.na(generaltumortype), 'Missing', generaltumortype)
) %>%
mutate(generaltumortype = format_variable(generaltumortype))
label(D$age_verbose) <- "Age(y)"
label(D$smoke_verbose) <- "Smoking"
table1(~ age_verbose + Gender + smoke_verbose | therapy_known, data=D, output="markdown", export="ktab")
| No (N=14008) |
Yes (N=10138) |
Overall (N=24146) |
|
|---|---|---|---|
| Age(y) | |||
| 0-10 | 229 (1.6%) | 108 (1.1%) | 337 (1.4%) |
| 10-20 | 177 (1.3%) | 120 (1.2%) | 297 (1.2%) |
| 20-30 | 423 (3.0%) | 285 (2.8%) | 708 (2.9%) |
| 30-40 | 884 (6.3%) | 635 (6.3%) | 1519 (6.3%) |
| 40-50 | 1732 (12.4%) | 1445 (14.3%) | 3177 (13.2%) |
| 50-60 | 3257 (23.3%) | 2531 (25.0%) | 5788 (24.0%) |
| 60-70 | 4056 (29.0%) | 3018 (29.8%) | 7074 (29.3%) |
| 70-80 | 2576 (18.4%) | 1643 (16.2%) | 4219 (17.5%) |
| 80-90 | 674 (4.8%) | 353 (3.5%) | 1027 (4.3%) |
| Gender | |||
| Male | 6439 (46.0%) | 4586 (45.2%) | 11025 (45.7%) |
| Female | 7569 (54.0%) | 5552 (54.8%) | 13121 (54.3%) |
| Smoking | |||
| Non Smoker | 6920 (49.4%) | 5145 (50.7%) | 12065 (50.0%) |
| Current/Former | 6160 (44.0%) | 4989 (49.2%) | 11149 (46.2%) |
| Missing | 928 (6.6%) | 4 (0.0%) | 932 (3.9%) |
table1(~ generaltumortype | therapy_known, data=D, output="markdown", export="ktab")
| No (N=14008) |
Yes (N=10138) |
Overall (N=24146) |
|
|---|---|---|---|
| generaltumortype | |||
| Adrenocortical Carcinoma | 41 (0.3%) | 16 (0.2%) | 57 (0.2%) |
| Ampullary Carcinoma | 35 (0.2%) | 27 (0.3%) | 62 (0.3%) |
| Anal Cancer | 34 (0.2%) | 23 (0.2%) | 57 (0.2%) |
| Appendiceal Cancer | 80 (0.6%) | 82 (0.8%) | 162 (0.7%) |
| Biliary Cancer | 251 (1.8%) | 257 (2.5%) | 508 (2.1%) |
| Bladder Cancer | 489 (3.5%) | 223 (2.2%) | 712 (2.9%) |
| Breast Carcinoma | 2151 (15.4%) | 1389 (13.7%) | 3540 (14.7%) |
| Breast Sarcoma | 37 (0.3%) | 7 (0.1%) | 44 (0.2%) |
| Cancer of Unknown Primary | 412 (2.9%) | 311 (3.1%) | 723 (3.0%) |
| Cervical Cancer | 75 (0.5%) | 43 (0.4%) | 118 (0.5%) |
| CHondroblastoma | 1 (0.0%) | 0 (0%) | 1 (0.0%) |
| CHondrosarcoma | 40 (0.3%) | 14 (0.1%) | 54 (0.2%) |
| CHordoma | 29 (0.2%) | 7 (0.1%) | 36 (0.1%) |
| CHoroid Plexus Tumor | 3 (0.0%) | 0 (0%) | 3 (0.0%) |
| Colorectal Cancer | 1093 (7.8%) | 1060 (10.5%) | 2153 (8.9%) |
| Embryonal Tumor | 113 (0.8%) | 58 (0.6%) | 171 (0.7%) |
| Endometrial Cancer | 446 (3.2%) | 385 (3.8%) | 831 (3.4%) |
| Ependymomal Tumor | 22 (0.2%) | 7 (0.1%) | 29 (0.1%) |
| Esophagogastric Carcinoma | 227 (1.6%) | 433 (4.3%) | 660 (2.7%) |
| Ewing Sarcoma | 29 (0.2%) | 45 (0.4%) | 74 (0.3%) |
| Gastrointestinal Neuroendocrine Tumor | 62 (0.4%) | 45 (0.4%) | 107 (0.4%) |
| Gastrointestinal Stromal Tumor | 200 (1.4%) | 84 (0.8%) | 284 (1.2%) |
| Germ Cell Tumor | 230 (1.6%) | 157 (1.5%) | 387 (1.6%) |
| Gestational Trophoblastic Disease | 8 (0.1%) | 5 (0.0%) | 13 (0.1%) |
| Glioma | 664 (4.7%) | 430 (4.2%) | 1094 (4.5%) |
| Head and Neck Carcinoma | 187 (1.3%) | 176 (1.7%) | 363 (1.5%) |
| Hepatocellular Carcinoma | 120 (0.9%) | 69 (0.7%) | 189 (0.8%) |
| Melanoma | 693 (4.9%) | 188 (1.9%) | 881 (3.6%) |
| Meningothelial Tumor | 54 (0.4%) | 12 (0.1%) | 66 (0.3%) |
| Mesothelioma | 115 (0.8%) | 109 (1.1%) | 224 (0.9%) |
| Miscellaneous Brain Tumor | 20 (0.1%) | 6 (0.1%) | 26 (0.1%) |
| Miscellaneous Neuroepithelial Tumor | 14 (0.1%) | 3 (0.0%) | 17 (0.1%) |
| Missing | 82 (0.6%) | 27 (0.3%) | 109 (0.5%) |
| Nerve Sheath Tumor | 35 (0.2%) | 14 (0.1%) | 49 (0.2%) |
| Non-Small Cell Lung Cancer | 1837 (13.1%) | 1722 (17.0%) | 3559 (14.7%) |
| Osteosarcoma | 51 (0.4%) | 58 (0.6%) | 109 (0.5%) |
| Ovarian Cancer | 315 (2.2%) | 350 (3.5%) | 665 (2.8%) |
| Pancreatic Cancer | 655 (4.7%) | 761 (7.5%) | 1416 (5.9%) |
| Penile Cancer | 8 (0.1%) | 1 (0.0%) | 9 (0.0%) |
| PheoCHromocytoma | 6 (0.0%) | 1 (0.0%) | 7 (0.0%) |
| Pineal Tumor | 3 (0.0%) | 1 (0.0%) | 4 (0.0%) |
| Prostate Cancer | 1014 (7.2%) | 480 (4.7%) | 1494 (6.2%) |
| Renal Cell Carcinoma | 413 (2.9%) | 157 (1.5%) | 570 (2.4%) |
| Renal Non-Clear Cell Carcinoma | 3 (0.0%) | 0 (0%) | 3 (0.0%) |
| Retinoblastoma | 26 (0.2%) | 14 (0.1%) | 40 (0.2%) |
| Salivary Carcinoma | 169 (1.2%) | 44 (0.4%) | 213 (0.9%) |
| Sellar Tumor | 53 (0.4%) | 7 (0.1%) | 60 (0.2%) |
| Sex Cord Stromal Tumor | 32 (0.2%) | 4 (0.0%) | 36 (0.1%) |
| Skin Cancer, Non-Melanoma | 178 (1.3%) | 50 (0.5%) | 228 (0.9%) |
| Small Bowel Cancer | 40 (0.3%) | 46 (0.5%) | 86 (0.4%) |
| Small Cell Lung Cancer | 97 (0.7%) | 115 (1.1%) | 212 (0.9%) |
| Soft Tissue Sarcoma | 640 (4.6%) | 300 (3.0%) | 940 (3.9%) |
| Thymic Tumor | 35 (0.2%) | 15 (0.1%) | 50 (0.2%) |
| Thyroid Cancer | 204 (1.5%) | 228 (2.2%) | 432 (1.8%) |
| Uterine Sarcoma | 111 (0.8%) | 59 (0.6%) | 170 (0.7%) |
| Vaginal Cancer | 10 (0.1%) | 5 (0.0%) | 15 (0.1%) |
| Wilms Tumor | 16 (0.1%) | 8 (0.1%) | 24 (0.1%) |
M_long = M_long %>%
mutate(myeloid_category = case_when(
CH_my == 1 ~ "Myeloid Gene",
CH_my == 0 ~ "Non-Myeloid Gene"
)) %>%
mutate(pd_category = case_when(
ch_my_pd == 1 ~ "Myeloid PD",
ch_my_pd == 0 & ch_pancan_pd == 1 ~ "Non-Myeloid PD",
TRUE ~ "Non PD"
)) %>%
mutate(
VariantClass2 = case_when(
VariantClass == 'Frame_Shift_Del' ~ 'Frameshift indel',
VariantClass == 'Frame_Shift_Ins' ~ 'Frameshift indel',
VariantClass == 'In_Frame_Del' ~ 'Inframe indel',
VariantClass == 'In_Frame_Ins' ~ 'Inframe indel',
VariantClass == 'Splice_Site' ~ 'Splice or other',
VariantClass == 'Splice_Region' ~ 'Splice or other',
VariantClass == "5'Flank" ~ 'Splice or other',
VariantClass == 'Translation_Start_Site' ~ 'Splice or other',
VariantClass == 'Nonstop_Mutation' ~ 'Splice or other',
VariantClass == 'Missense_Mutation' ~ 'Missense',
VariantClass == 'Nonsense_Mutation' ~ 'Nonsense',
T ~ VariantClass)
)
font_size = 8
lab_pos = 'out'
lab_color = 'black'
panel_theme = theme(
legend.direction = 'vertical',
legend.position="top",
legend.text = element_text(size=12),
plot.margin = margin(t = 0, r = 0, b = 0, l = 0),
axis.text = element_text(size = font_size)
)
p1 = ggdonutchart(
data = M_long %>% filter(myeloid_category != "NA") %>% count(myeloid_category),
x = "n",
fill = "myeloid_category",
color = "white",
lab.pos = lab_pos,
palette = pal_nejm('default')(8),
lab.font = c(lab_color, font_size),
size = 0.5) +
labs(fill = "Gene Class") +
labs(title = "B") +
panel_theme
p2 = ggdonutchart(
data = M_long %>% count(pd_category),
x = "n",
fill = "pd_category",
color = "white",
lab.pos = lab_pos,
palette = pal_nejm('default')(8),
lab.font = c(lab_color, font_size),
size = 0.5) +
labs(fill = "Mutation Driver Status") +
labs(title = "C") +
panel_theme
p3 = ggdonutchart(
data = M_long %>% count(VariantClass2),
x = "n",
fill = "VariantClass2",
color = "white",
lab.pos = lab_pos,
palette = pal_nejm('default')(8),
lab.font = c(lab_color, font_size),
size = 0.5) +
labs(fill = "Variant Effect") +
labs(title = "D") +
panel_theme
p4 = ggdonutchart(
data = M_long %>% count(variant_type),
x = "n",
fill = "variant_type",
color = "white",
lab.pos = lab_pos,
palette = pal_nejm('default')(8),
lab.font = c(lab_color, font_size),
size = 0.5) +
labs(fill = "Variant Type") +
labs(title = "E") +
panel_theme
p5 = ggdonutchart(
data = M_long %>% filter(variant_type == 'SNV') %>% count(sub_nuc),
x = "n",
fill = "sub_nuc",
color = "white",
lab.pos = lab_pos,
palette = pal_nejm('default')(8),
lab.font = c(0.1, "plain", lab_color),
size = 0.5) +
labs(fill = "Nucleotide Change (SNVs)") +
labs(title = "F") +
panel_theme
panel = (p1 | p2 | p3 | p4 | p5)
#Top Genes
gene_list = M_long %>% count(Gene) %>% arrange(-n) %>% .$Gene %>% unique %>% .[1:30]
genes <- ggplot(
M_long %>% filter(Gene %in% gene_list) %>%
mutate(Gene = factor(Gene, gene_list)) %>%
count(Gene, VariantClass2),
aes(x = Gene, y = n, fill = VariantClass2)
) +
geom_bar(stat = 'identity', color = 'black', size = 0.2) +
ylab("Number of Mutations") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "white"),
legend.title = element_blank()
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_nejm() + scale_color_nejm() + labs(title = 'A')
combo = (genes / panel) + plot_layout(ncol = 1, heights = c(1, 1))
do_plot(p = combo, f = "sfig2.png", w = 14, h = 8, save_pdf = F) #no pdf
rare_tumors <- M_wide_all %>%
group_by(generaltumortype) %>%
summarise(count=n_distinct(STUDY_ID)) %>%
arrange(-count) %>%
filter(count<100) %>%
pull(generaltumortype) %>%
as.character()
D <- M_wide_all %>% mutate(generaltumortype_r=ifelse(generaltumortype %in% rare_tumors,"Other",as.character(generaltumortype)))
D <- D %>% mutate(generaltumortype_r = relevel(factor(generaltumortype_r), "Breast Carcinoma"))
N <- as.data.frame(table(D$generaltumortype_r)) %>% dplyr::rename(term = Var1)
logit <- D %>%
glm(formula = ch_pancan_pd ~ age + generaltumortype_r, family = binomial(link="logit"), na.action = 'na.omit')
results <- logit %>% get_model_data(type="est")
results <- results %>% filter(term!="age") %>%
mutate(term=str_remove(term, "generaltumortype_r")) %>%
arrange(estimate) %>%
mutate(
q.value = p.adjust(p.value, n = nrow(.)),
q.label = paste0(signif(estimate, 2), signif.num(q.value)))
results_bind <- left_join(results,N)
results_bind <- results_bind %>% mutate(term = paste0(term, ' (n = ', Freq, ')'))
results_bind <- results_bind %>% mutate(term = factor(term, levels = term))
p = plot_forest(
results_bind,
x = "term",
eb_w = 0,
eb_s = 1,
ps = 2,
or_s = 3,
label = 'p.stars'
) +
xlab('') + ylab('OR of CH-PD') +
scale_fill_nejm() +
scale_color_nejm() +
scale_x_discrete(expand = c(0.025, 0))
do_plot(p, "efig1.png", 8, 5, save_pdf = F) #no pdf
options(repr.plot.width = 10, repr.plot.height = 6, repr.plot.res = 300)
n_dict = M_wide_all %>% count(generaltumortype) %>%
arrange(-n) %>% {setNames(.$n, .$generaltumortype)}
top_genes <- M_long %>%
group_by(Gene) %>%
summarise(count=n_distinct(STUDY_ID)) %>%
arrange(-count) %>%
filter(count>75) %>%
pull(Gene) %>%
as.character()
gene_list <- M_long %>%
group_by(Gene) %>%
summarise(count=n_distinct(STUDY_ID)) %>%
arrange(-count) %>%
filter(count>75) %>%
pull(Gene)
tumor_list = M_wide_all %>% count(generaltumortype) %>%
arrange(-n) %>%
top_n(12) %>%
pull(generaltumortype) %>%
as.character()
D <- M_long %>%
filter(Gene %in% top_genes) %>%
count(generaltumortype, Gene) %>%
filter(!is.na(generaltumortype)) %>%
group_by(generaltumortype) %>%
mutate(prop = n/n_dict[generaltumortype]) %>%
filter(generaltumortype %in% tumor_list) %>%
ungroup() %>%
mutate(Gene = factor(Gene, top_genes))
D_wide <- M_wide %>% filter(generaltumortype %in% tumor_list)
p <- D %>%
ggplot(
aes(x = Gene, y = prop, fill = generaltumortype)
) +
geom_bar(stat = 'identity', color = 'black', size = 0.2, position = position_dodge()) +
theme_bw() +
theme(
legend.position = "top",
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "white"),
legend.title = element_blank()
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Paired") +
ylab('Proportion With Mutation')
do_plot(p, "efig2.png", 8, 5, save_pdf = F) #no pdf
#Proportion of mutations by primary tumor site in untreated patients
options(repr.plot.width = 10, repr.plot.height = 6, repr.plot.res = 300)
D <- M_wide %>% filter(generaltumortype=="Endometrial Cancer" & therapy_binary=="untreated")
table(D$PPM1D)
##
## 0
## 151
D <- M_wide %>% filter(generaltumortype=="Ovarian Cancer" & therapy_binary=="untreated")
table(D$PPM1D)
##
## 0 1
## 23 2
font_size = 13
tumor_list = M_wide_all %>% count(generaltumortype) %>%
arrange(-n) %>%
top_n(15) %>%
pull(generaltumortype) %>%
as.character()
D = M_wide %>%
filter(generaltumortype %in% tumor_list) %>%
mutate(treatment_type = case_when(
XRT == 1 & ind_anychemo == 1 ~ "XRT & Systemic",
XRT == 1 ~ "XRT",
ind_anychemo == 1 ~ "Systemic Therapy",
T ~ "No treatment")) %>%
mutate(
generaltumortype = factor(
generaltumortype,
levels = names(sort(table(generaltumortype), decreasing = F)))
)
p1 = ggplot(D) +
geom_bar(aes(x = generaltumortype, fill = treatment_type), color = 'black') +
theme(
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = font_size),
axis.text.y = element_text(size = font_size),
legend.position = c(0.8, 0.3),
plot.title = element_text(hjust = .5, size = 16),
axis.title=element_text(size = 14),
legend.text = element_text(size = font_size),
legend.title = element_blank()
) +
ylab("Frequency") +
xlab("") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip()+
scale_fill_manual(values = c(
"No treatment" = 'gray',
'Systemic Therapy' = pal_npg()(4)[1],
'XRT & Systemic' = pal_npg()(4)[2],
'XRT' = pal_npg()(4)[3]
)
) +
scale_color_nejm()
D = M_wide %>% mutate(
Treatment = case_when(
ind_cytotoxic_therapy == 1 & ind_immune_therapy == 0 &
ind_targeted_therapy == 0 & ind_radiotherapy == 0 ~ "Only Cytotoxic",
ind_cytotoxic_therapy == 0 & ind_immune_therapy == 1 &
ind_targeted_therapy == 0 & ind_radiotherapy == 0 ~ "Only Immune",
ind_cytotoxic_therapy == 0 & ind_immune_therapy == 1 &
ind_targeted_therapy == 1 & ind_radiotherapy == 0 ~ "Only Targeted",
ind_cytotoxic_therapy == 0 & ind_immune_therapy == 0 &
ind_targeted_therapy == 0 & ind_radiotherapy == 1 ~ "Only Radiotherapy",
ind_anychemo == 0 ~ "NA",
T ~ "Multiple"
)) %>%
filter(Treatment != "NA") %>%
filter(generaltumortype %in% tumor_list) %>%
mutate(
generaltumortype = factor(
generaltumortype,
levels = names(sort(table(generaltumortype), decreasing = F)))
)
p2 = ggplot(D) +
geom_bar(aes(x = generaltumortype, fill = Treatment), color = 'black') +
xlab("") +
theme(
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = font_size),
axis.text.y = element_text(size = font_size),
legend.position = c(0.8,0.3),
plot.title = element_text(hjust = .5, size = 16),
axis.title=element_text(size = 14),
legend.text = element_text(size = 13),
legend.title = element_blank()
) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
scale_fill_nejm() +
scale_color_nejm()
options(repr.plot.width = 10, repr.plot.height = 5, repr.plot.res = 300)
exclude_list = c(
"ind_carboplatin", "ind_oxaliplatin", "ind_cisplatin",
"ind_gnrh_antagonist", "ind_anychemo", "ind_cytotoxic_therapy",
"ind_hormonal_therapy", "ind_antiestrogen", "ind_targeted_therapy",
"ind_nitrogen_mustard", "ind_vegf_inhibitor", "ind_aromatase_inhibitor",
"ind_immune_checkpoint_in", "ind_anti_pd1_antibody", "ind_carbo_pacli",
"ind_antiandrogen", "ind_camptothecins", "ind_epipodophyllotoxins",
"ind_vinca_alkaloids", 'ind_protein_kinase_inhib', 'ind_vascular_targeted_th',
'ind_immune_therapy'
)
D = M_wide %>%
select(str_subset(colnames(.), '^ind')) %>%
replace(., is.na(.), 0) %>%
colSums() %>%
as.data.frame %>%
setNames('freq') %>%
tibble::rownames_to_column('drug') %>%
arrange(-freq) %>%
filter(!(drug %in% exclude_list)) %>%
filter(!str_detect(drug, regex('_ds_|post'))) %>%
mutate(drug = format_variable(drug)) %>%
mutate(drug = factor(drug, drug))
p3 = ggplot(
D[1:10,]
) +
geom_col(
aes(x = drug, y = freq), color = 'black'
) +
theme(
plot.title = element_text(hjust = .5, size = 16),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = font_size),
axis.title = element_text(size=14),
axis.text.y = element_text(size=13)
) +
xlab("") +
ylab("Frequency") +
scale_fill_nejm() +
scale_color_nejm() +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0))
p <- egg::ggarrange(
p1, p2, p3,
labels = c('A', 'B', 'C'),
label.args = list(gp = grid::gpar(fontface = 'bold', fontsize = 15)),
draw = F
)
do_plot(p, "efig3.png", w = 12, h = 12, save_pdf = F) #no pdf
#number of pts recieving multiple classes of cytotoxic therapy
c <- M_wide %>% mutate(sum=ind_platinum+ind_antimetabolite+ind_nucleoside_analogue+ind_taxane+ind_topoisomerase_ii_inh+ind_alkylating_agent+ind_anthracycline+ind_topoisomerase_i_inhi+ind_folic_acid_analog+ind_microtubule_damaging)
c <- c %>% mutate(mult=ifelse(sum>1,2,sum))
table(c$mult)
##
## 0 1 2
## 5208 439 4491
#All
D = M_wide %>%
mutate(smoke_verbose = ifelse(smoke_bin == 1, 'Smoker', 'Non-smoker')) %>%
mutate(smoke_verbose = factor(smoke_verbose, c('Non-smoker', 'Smoker'))) %>%
mutate(age = age_d)
summary_all = glm(
data=D,
formula = CH_nonsilent ~ age + Gender + smoke_verbose + race + therapy_binary,
family = binomial(link = "logit"),
na.action = 'na.omit'
) %>% summar(CI = T, truncate_p = T)
#PD and non-PD
summary_pd = D %>%
glm(formula = ch_pancan_pd ~ age + Gender + smoke_verbose + race + therapy_binary,
family = binomial(link = "logit"), na.action = 'na.omit') %>%
summar(CI = T, truncate_p = T)
summary_nonpd = D %>%
glm(formula = ch_nonpd ~ age + Gender + smoke_verbose + race + therapy_binary,
family = binomial(link = "logit"), na.action = 'na.omit') %>%
summar(CI = T, truncate_p = T)
# PD vs non PD
summary_pd_het = D %>% filter(CH_all == 1) %>%
glm(formula = ch_pancan_pd ~ age + Gender + smoke_verbose + race + therapy_binary,
family = binomial(link="logit"), na.action = 'na.omit') %>%
summar(CI = T, truncate_p = T) %>%
select(variable, levels, pval)
## all, PD
summary = Reduce(
f = function(x, y) {
full_join(x, y, by = c('variable', 'levels'))
},
x = list(
summary_all,
summary_pd, summary_nonpd, summary_pd_het)
) %>%
mutate(levels = ifelse(levels == 'Age', '-', levels)) %>%
rename(`variable (ref)` = variable)
# rename_all(
# function(x){str_remove(x, '(.x)+|(.y)+')}
# )
summary %>%
kable(format = "latex", booktabs = T, align = 'l', escape = F) %>%
kable_styling(
latex_options = c("hold_position"),
full_width = T,
font_size = 10
) %>%
column_spec(1, width = 100) %>%
column_spec(2:50, width = 45) %>%
add_header_above(
c(" " = 2, "All CH" = 3, "Myeloid CH" = 3, "Non-Myeloid CH" = 3,
"Heterogeneity" = 1)
) %>%
collapse_rows(columns = 1:2, row_group_label_position = 'identity') %>%
display_kable('etable2.pdf')
D <- M_wide
#ALL
summary_all = D %>%
glm(formula = CH_nonsilent ~ age + Gender + smoke + race + therapy_binary,
family = binomial(link = "logit"), na.action = 'na.omit') %>%
get_model_data(type="est")
#ASXL1
D %>%
glm(formula = ASXL1 ~ age + Gender + smoke + race + therapy_binary,
family = binomial(link = "logit"), na.action = 'na.omit') %>%
get_model_data(type="est")
## term estimate std.error conf.low conf.high statistic
## 1 age 1.0739279 0.0090770 1.05499099 1.0932046 7.8575323
## 2 GenderFemale 0.5930150 0.1918257 0.40717703 0.8636704 -2.7240121
## 3 smoke1 3.1198996 0.3463580 1.58241426 6.1512171 3.2850431
## 4 smoke2 2.4236429 0.2276672 1.55123637 3.7866860 3.8884469
## 5 raceAsian 0.8047032 0.4272980 0.34827241 1.8593127 -0.5085016
## 6 raceBlack 0.1592018 1.0075356 0.02209694 1.1470011 -1.8238389
## 7 raceMissing 0.5381015 0.7195313 0.13134033 2.2046029 -0.8612662
## 8 raceOther 0.8930058 0.4649406 0.35900143 2.2213266 -0.2433906
## 9 therapy_binarytreated 1.1516425 0.1868239 0.79853297 1.6608963 0.7557339
## den.df p.value p.stars p.label group xpos xmin xmax
## 1 Inf 3.917751e-15 *** 1.07 *** pos 9 8.825 9.175
## 2 Inf 6.449414e-03 ** 0.59 ** neg 8 7.825 8.175
## 3 Inf 1.019668e-03 ** 3.12 ** pos 7 6.825 7.175
## 4 Inf 1.008877e-04 *** 2.42 *** pos 6 5.825 6.175
## 5 Inf 6.111016e-01 0.80 neg 5 4.825 5.175
## 6 Inf 6.817643e-02 0.16 neg 4 3.825 4.175
## 7 Inf 3.890914e-01 0.54 neg 3 2.825 3.175
## 8 Inf 8.077028e-01 0.89 neg 2 1.825 2.175
## 9 Inf 4.498088e-01 1.15 pos 1 0.825 1.175
# heterogenetiy among smokers
D %>% filter(smoke_bin == 1) %>%
glm(formula = ASXL1 ~ age + Gender + smoke + race + therapy_binary,
family = binomial(link="logit"), na.action = 'na.omit') %>%
get_model_data(type="est")
## term estimate std.error conf.low conf.high statistic
## 1 age 1.0674523 0.01062001 1.04546311 1.0899040 6.1463979
## 2 GenderFemale 0.6473675 0.21824380 0.42206704 0.9929339 -1.9924558
## 3 smoke2 0.8060144 0.30808953 0.44065276 1.4743108 -0.6999710
## 4 raceAsian 0.6780183 0.59810495 0.20995856 2.1895219 -0.6496869
## 5 raceBlack 0.2143903 1.00995679 0.02961613 1.5519650 -1.5247752
## 6 raceMissing 0.6909964 0.72348745 0.16735632 2.8530501 -0.5108875
## 7 raceOther 1.2370929 0.47121226 0.49125356 3.1152931 0.4515252
## 8 therapy_binarytreated 1.1366987 0.21095479 0.75176189 1.7187408 0.6073725
## den.df p.value p.stars p.label group xpos xmin xmax
## 1 Inf 7.926231e-10 *** 1.07 *** pos 8 7.825 8.175
## 2 Inf 4.632107e-02 * 0.65 * neg 7 6.825 7.175
## 3 Inf 4.839454e-01 0.81 neg 6 5.825 6.175
## 4 Inf 5.158945e-01 0.68 neg 5 4.825 5.175
## 5 Inf 1.273152e-01 0.21 neg 4 3.825 4.175
## 6 Inf 6.094298e-01 0.69 neg 3 2.825 3.175
## 7 Inf 6.516111e-01 1.24 pos 2 1.825 2.175
## 8 Inf 5.436037e-01 1.14 pos 1 0.825 1.175
treatment_palette = c(pal_nejm()(2)[2], pal_nejm()(2)[1])
D = M %>% filter(VAF_N < 0.35) %>%
filter(CH_nonsilent == 1) %>%
mutate(CH_my = ifelse(CH_my == 1, 'Myeloid gene', 'Non-myeloid gene')) %>%
mutate(pd_status = ifelse(ch_pancan_pd == 1, 'PD', 'Non-PD')) %>%
mutate(pd_status = factor(pd_status))
D = D %>% group_by(STUDY_ID) %>% dplyr::mutate(mutnum=n())
D = D %>% mutate(mutnum = ifelse(mutnum > 1, '2+', as.character(mutnum)))
panel_theme = theme_bw() + theme(
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank(),
plot.title = element_text(size = 8)
)
#Myeloid
model_my = geeglm(
data = D,
formula = log(VAF_N) ~ age_scaled + race_b + smoke_bin + therapy_binary + CH_my,
family = gaussian,
corstr = "exchangeable",
id = STUDY_ID)
pval_my = model_my %>% summary %>% coefficients %>% .['CH_myNon-myeloid gene', 'Pr(>|W|)']
df <- D %>% group_by(CH_my) %>% summarise(median=median(VAF_N))
df
## # A tibble: 2 x 2
## CH_my median
## <chr> <dbl>
## 1 Myeloid gene 0.0469
## 2 Non-myeloid gene 0.0380
p_my = ggplot(
D,
aes(y = VAF_N, x = CH_my, fill = therapy_binary)
) +
geom_boxplot(outlier.alpha = 0) +
geom_point(position = position_jitterdodge(), pch = 21, color = 'black', size = 1) +
panel_theme + xlab("") + ylab('VAF') +
scale_fill_manual(values = treatment_palette) +
geom_signif(
comparisons = list(levels(as.factor(D$CH_my))),
y_position = 0.36,
vjust = -0.5,
tip_length = 0.01,
#annotation = paste0('p = ', scientific(pval_my, digits = 2)),
#CHANGE THIS
annotation = paste0('p = <1x10-6'),
map_signif_level = TRUE,
textsize = 3
) +
scale_y_continuous(expand = c(0.1, 0))
model_pd = geeglm(
data = D,
formula = log(VAF_N) ~ age_scaled + race + smoke_bin + therapy_binary + ch_pancan_pd,
family = gaussian,
corstr = "exchangeable",
id = STUDY_ID)
pval_pd = model_pd %>% summary %>% coefficients %>% .['ch_pancan_pd', 'Pr(>|W|)']
p_pd = ggplot(
D,
aes(y = VAF_N, x = pd_status, fill = therapy_binary)
) +
geom_boxplot(outlier.alpha = 0) +
geom_point(position = position_jitterdodge(), pch = 21, color = 'black', size = 1) +
panel_theme + xlab("") + ylab('VAF') +
scale_fill_manual(values = treatment_palette) +
geom_signif(
comparisons = list(levels(D$pd_status)),
y_position = 0.36,
vjust = -0.5,
tip_length = 0.01,
#annotation = paste0('p = ', scientific(pval_pd, digits = 2)),
#CHANGE THIS
annotation = paste0('p = <1x10-6'),
map_signif_level = TRUE,
textsize = 3
) +
scale_y_continuous(expand = c(0.1, 0))
model_mutnum = geeglm(
data = D,
formula = log(VAF_N) ~ age_scaled + race + smoke_bin + therapy_binary + mutnum,
family = gaussian,
corstr = "exchangeable",
id = STUDY_ID)
pval_mutnum = model_mutnum %>% summary %>% coefficients %>% .['mutnum2+', 'Pr(>|W|)']
p_mutnum = ggplot(
D,
aes(x = factor(mutnum), y = VAF_N, fill = therapy_binary)
) +
geom_boxplot(outlier.alpha = 0) +
theme_bw() +
theme(
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
strip.background = element_rect(fill = "white", color = 'white'),
legend.title = element_blank()
) +
ylab('VAF') +
geom_point(position = position_jitterdodge(), pch = 21, color = 'black', size = 1) +
scale_fill_manual(values = treatment_palette) +
xlab('Number of mutations') +
geom_signif(
comparisons = list(levels(factor(D$mutnum))),
y_position = 0.36,
vjust = -0.5,
tip_length = 0.01,
annotation = paste0('p = ', scientific(pval_mutnum, digits = 2)),
map_signif_level = TRUE,
textsize = 3
) +
scale_y_continuous(expand = c(0.1, 0))
panel <- ggarrange(
p_my, p_pd, p_mutnum,
ncol = 3,
nrow = 1,
common.legend = TRUE,
legend = "top", align = 'hv',
labels = c('A', 'B', 'C')
)
do_plot(panel, "SuppFig3.png", w = 9, h = 4, save_pdf = F) #no pdf
#calculate medians
df <- D %>% group_by(CH_my) %>% summarise(median=median(VAF_N))
df <- D %>% group_by(ch_pancan_pd) %>% summarise(median=median(VAF_N))
D = M %>%
filter(!is.na(smoke_bin)) %>%
mutate(
smoke_bin = factor(ifelse(smoke_bin == 1, 'Smoker', 'Non-smoker'), c('Non-smoker', 'Smoker'))
) %>%
mutate(
pd_status = case_when(
ch_my_pd==1 ~ "Myeloid PD",
ch_nonmy_pd==1 ~ "Non-Myeloid PD",
ch_my_pd==0 & ch_nonmy_pd==0 & myeloid_gene==1 ~ "Non-PD Myeloid",
ch_my_pd==0 & ch_nonmy_pd==0 & myeloid_gene==0 ~ "Non-PD Non-Myeloid"
)
) %>%
mutate(pd_status = relevel(factor(pd_status), ref = 'Non-PD Non-Myeloid')) %>%
group_by(STUDY_ID) %>%
mutate(mutnum = as.character(min(n(), 2))) %>%
mutate(age = age_d) %>%
ungroup()
summary_multi = geeglm(
data = D,
formula = log(VAF_N) ~ age + race + smoke_bin + therapy_binary + pd_status + mutnum,
family = gaussian,
# corstr = "exchangeable",
id = STUDY_ID
) %>% summar(truncate_p = T)
summary_multi %>%
mutate(levels = ifelse(levels == '2', '2+', levels)) %>%
mutate(levels = ifelse(levels == 'Age', '-', levels)) %>%
rename(`variable (ref)` = variable) %>%
kable(
format = "latex", booktabs = T, align = 'l', escape = F, linesep = ''
) %>%
kable_styling(
latex_options = c("basic", "hold_position"),
full_width = T,
font_size = 10
) %>%
column_spec(1:10, width = 120) %>%
collapse_rows(columns = 1:2, row_group_label_position = 'identity') %>%
display_kable('etable3.pdf')
D = M_wide %>%
mutate(
race_b = factor(ifelse(race_b == 1, 'White', 'Other'), c('White', 'Other')),
smoke_bin = factor(ifelse(smoke_bin == 1, 'Smoker', 'Non-smoker'), c('Non-smoker', 'Smoker')),
age = age_d
)
summary_all = D %>%
filter(mutnum_all > 0) %>%
MASS::polr(
data = .,
formula = factor(mutnum_all) ~ age_scaled + Gender + race_b + smoke_bin + therapy_binary,
Hess = TRUE,
method = "logistic") %>%
summar(truncate_p = T)
summary_all %>%
rename(`variable (ref)` = variable) %>%
mutate(levels = ifelse(levels == 'Age', '-', levels)) %>%
kable(format = "latex", booktabs = T, align = 'l', escape = F) %>%
kable_styling(
latex_options = c("hold_position"),
full_width = T,
font_size = 10
) %>%
column_spec(1, width = 100) %>%
column_spec(2:50, width = 40) %>%
collapse_rows(columns = 1:2, row_group_label_position = 'identity') %>%
display_kable('etable4.pdf')
# Proportion of myeloid mutations
C = M %>%
mutate(myeloid_category = case_when(
CH_my == 1 & CH_nonsilent == 1 ~ "Myeloid Gene",
CH_my == 0 & CH_nonsilent == 1 ~ "Non-Myeloid Gene",
TRUE ~ "NA"
)) %>%
count(myeloid_category, therapy_binary) %>%
filter(myeloid_category != "NA") %>%
group_by(therapy_binary) %>%
mutate(total = sum(n)) %>%
mutate(proportion = n/total) %>%
mutate(myeloid_category = factor(myeloid_category, levels = c('Myeloid Gene', 'Non-Myeloid Gene')))
label_size = 3
p_my = ggplot(
C,
aes(x = therapy_binary, y = proportion, fill = myeloid_category, label = n)
) +
geom_bar(stat = 'identity') + theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank(),
) + xlab('') + coord_flip() +
geom_text(size = label_size, position = position_stack(vjust = 0.5), color = 'white') +
theme(strip.background = element_rect(fill = "white", color = 'white')) +
scale_fill_nejm() + scale_color_nejm() +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0))
#(b) Proportion of putative drivers
C = M %>%
mutate(pd_category = case_when(
ch_my_pd == 1 ~ "Myeloid PD",
ch_my_pd == 0 & ch_pancan_pd == 1 ~ "Non-Myeloid PD",
TRUE ~ "Non PD"
)) %>%
count(pd_category, therapy_binary) %>%
filter(pd_category != "NA") %>%
group_by(therapy_binary) %>%
mutate(total = sum(n)) %>%
mutate(proportion = n/total) %>%
mutate(pd_category = factor(pd_category, levels = c('Myeloid PD', 'Non-Myeloid PD', 'Non PD')))
p_pd = ggplot(
C,
aes(x = therapy_binary, y = proportion, fill = pd_category, label = n)
) +
geom_bar(stat = 'identity') + theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank()
) + xlab('') + coord_flip() +
geom_text(size = label_size, position = position_stack(vjust = 0.5), color = 'white') +
theme(strip.background = element_rect(fill = "white", color = 'white')) +
scale_fill_nejm() + scale_color_nejm() +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0))
#(c) Proportion of silent mutations
C = M %>%
count(CH_silent, therapy_binary) %>%
group_by(therapy_binary) %>%
mutate(total = sum(n)) %>%
mutate(proportion = n/total) %>%
mutate(CH_silent = ifelse(CH_silent == 1, 'Silent', 'Non-silent'))
p_silent = ggplot(
C,
aes(x = therapy_binary, y = proportion, fill = CH_silent, label = n)
) +
geom_bar(stat = 'identity') + theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank()
) + xlab('') + ylab('') + coord_flip() +
geom_text(size = label_size, position = position_stack(vjust = 0.5), color = 'white') +
theme(strip.background = element_rect(fill = "white", color = 'white')) +
scale_fill_nejm() + scale_color_nejm() +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0))
#(d) Variant class
# tally
D = M %>% mutate(therapy_binary = factor(therapy_binary, c('untreated', 'treated'))) %>%
mutate(
VariantClass = case_when(
VariantClass == 'Frame_Shift_Del' ~ 'Frameshift indel',
VariantClass == 'Frame_Shift_Ins' ~ 'Frameshift indel',
VariantClass == 'In_Frame_Del' ~ 'Inframe indel',
VariantClass == 'In_Frame_Ins' ~ 'Inframe indel',
VariantClass == 'Splice_Site' ~ 'Splice or other',
VariantClass == 'Splice_Region' ~ 'Splice or other',
VariantClass == "5'Flank" ~ 'Splice or other',
VariantClass == 'Translation_Start_Site' ~ 'Splice or other',
VariantClass == 'Nonstop_Mutation' ~ 'Splice or other',
VariantClass == 'Missense_Mutation' ~ 'Missense',
VariantClass == 'Nonsense_Mutation' ~ 'Nonsense',
T ~ VariantClass)
) %>%
group_by(Gene) %>% mutate(n_gene = n()) %>% ungroup() %>%
filter(n_gene > 20) %>%
arrange(-n_gene) %>% mutate(Gene = factor(Gene, unique(Gene))) %>%
count(VariantClass, therapy_binary) %>%
group_by(therapy_binary) %>%
mutate(total = sum(n)) %>%
mutate(proportion = n/total) %>%
ungroup()
# plot
p_vc = ggplot(
D,
aes(x = therapy_binary, y = proportion, fill = VariantClass, label = n)
) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
coord_flip() +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank()
) +
scale_fill_nejm() + scale_color_nejm() +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
xlab('') + ylab('') +
geom_text(size = label_size, position = position_stack(vjust = 0.5), color = 'white')
#(e) Nucleotide substitution
C = M %>% filter(variant_type == 'SNV') %>%
count(sub_nuc, therapy_binary) %>%
group_by(therapy_binary) %>%
mutate(total = sum(n)) %>%
mutate(proportion = n/total)
p_nuc = ggplot(
C,
aes(x = therapy_binary, y = proportion, fill = sub_nuc, label = n)
) +
geom_bar(stat = 'identity') + theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank()
) + xlab('') + coord_flip() +
geom_text(size = label_size, position = position_stack(vjust = 0.5), color = 'white') +
scale_fill_nejm() + scale_color_nejm() +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0))
# grid.arrange(p_nuc, p_type, ncol = 1)
#(f) Mutation type vs treatment type
C = M %>% filter(variant_type %in% c('INS', 'DEL', 'SNV')) %>%
mutate(
treatment_type = case_when(
XRT == 0 & ind_anychemo == 1 ~ "Chemo no XRT",
XRT == 1 ~ "XRT",
XRT == 0 & ind_anychemo == 0 ~ "No treatment",
T ~ "NA")
) %>%
filter((!is.na(treatment_type)) & (treatment_type != 'NA')) %>%
mutate(treatment_type = factor(treatment_type, levels = c('Chemo no XRT', 'XRT', 'No treatment'))) %>%
count(variant_type, treatment_type) %>%
group_by(treatment_type) %>%
mutate(total = sum(n)) %>%
mutate(proportion = n/total)
p_indel = ggplot(
C,
aes(x = treatment_type, y = proportion, fill = variant_type, label = n)
) +
geom_bar(stat = 'identity') + theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
legend.title = element_blank()
) + xlab('') + coord_flip() +
geom_text(size = label_size, position = position_stack(vjust = 0.5), color = 'white') +
scale_fill_nejm() + scale_color_nejm() +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0))
#(g) Indel length
C = M %>% filter(variant_type %in% c('DEL', 'INS')) %>%
mutate(
treatment_type = case_when(
XRT == 0 & ind_anychemo == 1 ~ "Chemo no XRT",
XRT == 1 ~ "XRT",
XRT == 0 & ind_anychemo == 0 ~ "No treatment",
T ~ "NA")
) %>%
mutate(treatment_type = factor(treatment_type, levels = c('Chemo no XRT', 'XRT', 'No treatment'))) %>%
mutate(indel_length = as.numeric(End) - as.numeric(Start)) %>%
mutate(
indel_length_bin = cut(
x = indel_length,
breaks = c(-Inf, 1, 5, Inf),
labels = c('1', '2-5', '>5'))
) %>%
count(indel_length_bin, treatment_type) %>%
group_by(treatment_type) %>%
mutate(total = sum(n)) %>%
mutate(proportion = n/total)
p_len = ggplot(
C,
aes(x = treatment_type, y = proportion, fill = indel_length_bin, label = n)
) +
geom_bar(stat = 'identity') + theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black")
) + xlab('') + coord_flip() +
geom_text(size = label_size, position = position_stack(vjust = 0.5), color = 'white') +
guides(fill = guide_legend(title = 'Indel Length (nts)')) +
scale_fill_nejm() + scale_color_nejm() +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0))
panel = ggarrange(p_my, p_pd, p_silent, p_nuc, p_indel, p_len, p_vc,
ncol = 2, nrow = 4, common.legend = F, legend = "top", align = 'hv', labels = 'AUTO')
do_plot(panel, 'supp_fig5.png', w = 11, h = 9, save_pdf = F) #no pdf
#Supplementary Figure 4 Hotspot R882H
D <- M_long %>% filter(CH_nonsilent == 1)
D <- M_long %>% mutate(dnmt3a_status=case_when(
Gene=="DNMT3A" & AAchange=="p.R882H" ~ 'R882H',
Gene=="DNMT3A" & AAchange=="p.R882C" ~ 'R882C',
Gene=="DNMT3A" ~ 'Other'
))
D <- D %>% filter(Gene=="DNMT3A" & therapy_known==1)
p<- ggplot(
D,
aes(x = factor(dnmt3a_status), y = VAF_N, fill = therapy_binary)
) + geom_boxplot(outlier.alpha = 0) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"),
strip.background = element_rect(fill = "white", color = 'white'),
legend.title = element_blank()
) +
geom_point(position = position_jitterdodge(), pch = 21, color = 'black', size = 1) +
xlab("DNMT3A Mutation Amino Acid Change") +
ylab("VAF") +
scale_fill_nejm() + scale_color_nejm()
do_plot(p, 'supp_fig4.png', w = 11, h = 9, save_pdf = F) #no pdf
#Perform Regression
D_nomiss<- na.omit(subset(D, select = c(VAF_N, age, race, smoke_bin, STUDY_ID, dnmt3a_status, therapy_binary)))
geeglm(
data = D_nomiss,
formula = log(VAF_N) ~ age + race + smoke_bin + factor(dnmt3a_status) + factor(therapy_binary),
family = gaussian,
corstr = "exchangeable",
id = STUDY_ID) %>%
get_model_data(type="est")
## term estimate std.error conf.low
## 1 age 0.005228682 0.002136098 0.001042006
## 2 raceAsian -0.054495216 0.099609742 -0.249726722
## 3 raceBlack -0.012852715 0.088305256 -0.185927836
## 4 raceOther -0.207767783 0.085796794 -0.375926410
## 5 raceUnknown 0.168933949 0.133516646 -0.092753868
## 6 smoke_bin1 0.053760814 0.043796021 -0.032077810
## 7 factor(dnmt3a_status)R882C 0.406409808 0.156622791 0.099434779
## 8 factor(dnmt3a_status)R882H 0.592651057 0.108264729 0.380456087
## 9 factor(therapy_binary)treated -0.058264323 0.045263498 -0.146979148
## conf.high statistic den.df p.value p.stars p.label group xpos
## 1 0.009415358 5.99158828 Inf 1.437425e-02 * 0.01 * pos 9
## 2 0.140736290 0.29930442 Inf 5.843188e-01 -0.05 neg 8
## 3 0.160222406 0.02118442 Inf 8.842777e-01 -0.01 neg 7
## 4 -0.039609157 5.86427452 Inf 1.545123e-02 * -0.21 * neg 6
## 5 0.430621765 1.60089571 Inf 2.057763e-01 0.17 pos 5
## 6 0.139599438 1.50682341 Inf 2.196244e-01 0.05 pos 4
## 7 0.713384838 6.73315239 Inf 9.463720e-03 ** 0.41 ** pos 3
## 8 0.804846028 29.96568008 Inf 4.397613e-08 *** 0.59 *** pos 2
## 9 0.030450503 1.65694916 Inf 1.980157e-01 -0.06 neg 1
## xmin xmax
## 1 8.825 9.175
## 2 7.825 8.175
## 3 6.825 7.175
## 4 5.825 6.175
## 5 4.825 5.175
## 6 3.825 4.175
## 7 2.825 3.175
## 8 1.825 2.175
## 9 0.825 1.175
##Supplementary Figure 3
##Heterogenetiy in OR for clinical Characteristics by Gene
#Plotting theme
panel_theme = theme_bw() + theme(
panel.border = element_blank(),
legend.position = "none",
panel.grid.minor = element_blank(),
plot.subtitle = element_text(hjust = 0.5, size = 8),
plot.title = element_text(face = 'bold', hjust = 0, vjust = -2, size = 8),
panel.grid.major = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 6),
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 8),
axis.line = element_line(),
plot.margin = unit(c(0,0,0,0), 'pt')
)
#Age
genelist1 = c("PPM1D","TP53","CHEK2","DNMT3A","TET2","ASXL1","ATM", "SRSF2", "SF3B1", "JAK2")
genelist2 = c("PPM1D","TP53","CHEK2","DNMT3A","TET2","ASXL1","ATM", "SRSF2", "SF3B1", "JAK2")
D <- M_wide
vars = NULL
for (gene1 in genelist1) {
for (gene2 in genelist2) {
if ((M_wide %>% filter(!!as.name(gene1)==1) %>% count(.data[[gene1]]) %>% pull(n)) > (M_wide %>% filter(!!as.name(gene2)==1) %>% count(.data[[gene2]]) %>% pull(n))) {
var=(paste(gene1,gene2,sep="_"))
vars = c(vars,var)
D <- D %>% mutate(!!sym(var) :=case_when(!!as.name(gene1)==1 & !!as.name(gene2)==1 ~ 999,
!!as.name(gene1)==1 ~ 0,
!!as.name(gene2)==1 ~ 1))
}
}
}
D <- D %>% mutate_at(.vars = vars,
.funs = gsub,
pattern = 999,
replacement = NA)
D <- D %>% mutate_at(.vars = vars,
.funs = as.numeric)
logit_gene_var = list()
for (gene in vars) {
logit = glm(
formula = get(gene) ~ age_scaled + smoke_bin + race_b + Gender + therapy_binary,
data = D,
family = binomial(link = "logit"), na.action = 'na.omit')
print(gene)
logit_data = logit %>% sjPlot::get_model_data(type="est") %>% cbind(Gene = gene)
logit_gene_var = rbind(logit_gene_var, logit_data)
}
## [1] "PPM1D_TP53"
## [1] "PPM1D_CHEK2"
## [1] "PPM1D_ASXL1"
## [1] "PPM1D_ATM"
## [1] "PPM1D_SRSF2"
## [1] "PPM1D_SF3B1"
## [1] "PPM1D_JAK2"
## [1] "TP53_CHEK2"
## [1] "TP53_ATM"
## [1] "TP53_SRSF2"
## [1] "TP53_SF3B1"
## [1] "TP53_JAK2"
## [1] "CHEK2_ATM"
## [1] "CHEK2_SRSF2"
## [1] "CHEK2_SF3B1"
## [1] "CHEK2_JAK2"
## [1] "DNMT3A_PPM1D"
## [1] "DNMT3A_TP53"
## [1] "DNMT3A_CHEK2"
## [1] "DNMT3A_TET2"
## [1] "DNMT3A_ASXL1"
## [1] "DNMT3A_ATM"
## [1] "DNMT3A_SRSF2"
## [1] "DNMT3A_SF3B1"
## [1] "DNMT3A_JAK2"
## [1] "TET2_PPM1D"
## [1] "TET2_TP53"
## [1] "TET2_CHEK2"
## [1] "TET2_ASXL1"
## [1] "TET2_ATM"
## [1] "TET2_SRSF2"
## [1] "TET2_SF3B1"
## [1] "TET2_JAK2"
## [1] "ASXL1_TP53"
## [1] "ASXL1_CHEK2"
## [1] "ASXL1_ATM"
## [1] "ASXL1_SRSF2"
## [1] "ASXL1_SF3B1"
## [1] "ASXL1_JAK2"
## [1] "ATM_SRSF2"
## [1] "ATM_SF3B1"
## [1] "ATM_JAK2"
## [1] "SF3B1_SRSF2"
## [1] "JAK2_SRSF2"
## [1] "JAK2_SF3B1"
logit_gene_var <- logit_gene_var %>% mutate(p_fdr = p.adjust(p.value, method = "fdr")) %>%
mutate(log_or = log(estimate)) %>% separate(Gene, c("Gene1", "Gene2"))
#AGE
D <- logit_gene_var %>% filter(term=="age_scaled")
age = ggplot(
D,
aes(x = Gene2, y = Gene1, fill = log_or)
) +
geom_tile(color = "lightgrey") +
scale_fill_gradient2(
low = "darkblue",
mid = "white",
high = "darkred",
midpoint = 0,
na.value = "white",
limits = c(-2.5, 2.5)
) +
geom_point(
aes(size = ifelse(p_fdr <= 0.05, 1, NA)),
shape = 8,
colour = "black",
show.legend = F
) +
scale_size_area(max_size = 2) +
xlab('Alternate Gene') +
ylab('Reference Gene') +
# title("Age") +
panel_theme +
scale_y_discrete(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
ggtitle('Age') +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_blank(),
legend.position = 'right',
axis.text.x = element_text(size = 6, angle = 30, hjust = 1),
axis.text.y = element_text(size = 6),
plot.margin = unit(c(8, 0, 8, 0), 'pt'),
legend.text = element_text(size = 8),
legend.title = element_text(size = 8),
legend.key.size = unit(8, "pt")
)
#Therapy
D <- logit_gene_var %>% filter(term=="therapy_binarytreated")
therapy = ggplot(
D,
aes(x = Gene2, y = Gene1, fill = log_or)
) +
geom_tile(color = "lightgrey") +
scale_fill_gradient2(
low = "darkblue",
mid = "white",
high = "darkred",
midpoint = 0,
na.value = "white",
limits = c(-2.5, 2.5)
) +
geom_point(
aes(size = ifelse(p_fdr <= 0.05, 1, NA)),
shape = 8,
colour = "black",
show.legend = F
) +
scale_size_area(max_size = 2) +
xlab('Alternate Gene') +
ylab('Reference Gene') +
ggtitle('Therapy') +
panel_theme +
scale_y_discrete(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_blank(),
legend.position = 'right',
axis.text.x = element_text(size = 6, angle = 30, hjust = 1),
axis.text.y = element_text(size = 6),
plot.margin = unit(c(8, 0, 8, 0), 'pt'),
legend.text = element_text(size = 8),
legend.title = element_text(size =8),
legend.key.size = unit(8, "pt")
)
#Smoking
D <- logit_gene_var %>% filter(term=="smoke_bin1")
smoking = ggplot(
D,
aes(x = Gene2, y = Gene1, fill = log_or)
) +
geom_tile(color = "lightgrey") +
scale_fill_gradient2(
low = "darkblue",
mid = "white",
high = "darkred",
midpoint = 0,
na.value = "white",
limits = c(-2.5, 2.5)
) +
geom_point(
aes(size = ifelse(p_fdr <= 0.05, 1, NA)),
shape = 8,
colour = "black",
show.legend = F
) +
scale_size_area(max_size = 2) +
xlab('Alternate Gene') +
ylab('Reference Gene') +
ggtitle('Smoking') +
panel_theme +
scale_y_discrete(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_blank(),
legend.position = 'right',
axis.text.x = element_text(size = 6, angle = 30, hjust = 1),
axis.text.y = element_text(size = 6),
plot.margin = unit(c(8, 0, 8, 0), 'pt'),
legend.text = element_text(size = 8),
legend.title = element_text(size = 8),
legend.key.size = unit(8, "pt")
)
combo <- age + therapy + smoking +
guide_area() +
plot_layout(guides = 'collect')
do_plot(combo, "SuppFig3.png", w = 5, h = 6, save_pdf = F) #no pdf
## Warning: Removed 39 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 42 rows containing missing values (geom_point).
## Chemo and XRT by time tx CH-PD (Figure not included)
panel_theme = theme_bw() + theme(
panel.border = element_blank(),
legend.position = "none",
panel.grid.minor = element_blank(),
plot.subtitle = element_text(hjust = 0.5, size = 4),
plot.title = element_text(face = 'bold', hjust = 0, vjust = -2, size = 4),
panel.grid.major = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title = element_text(size = 12),
axis.line = element_line(),
plot.margin = unit(c(0,0,0,0), 'pt')
)
# Compute buckets of chemo times and limit to people who got cytotoxic therapy or were untreated without radiation therapy
D <- M_wide %>% filter(XRT==0 & ind_radiotherapy==0) %>%
mutate(time_cytotoxic_therapy_r=
cut(time_cytotoxic_therapy,
breaks=c(Inf,1800,720,365,180,0),
labels = c(1, 2, 3, 4, 5)))
time_chemo_steps <- D$time_cytotoxic_therapy_r %>% unique() %>% sort
logit_time_var = list()
for (t in time_chemo_steps) {
D_step <- D %>% filter(time_cytotoxic_therapy_r==t | ind_cytotoxic_therapy==0)
logit <- glm(formula = ch_pancan_pd ~ pct_cytotoxic_therapy + age_scaled + smoke_bin + race_b,
data = D_step,
family = binomial(link="logit"), na.action = 'na.omit')
logit_data <- logit %>% sjPlot::get_model_data(type="est") %>% cbind(Time = t) %>% cbind(N=nrow(filter(D_step,time_cytotoxic_therapy_r==t)))
logit_time_var = rbind(logit_time_var, logit_data)
}
C <- logit_time_var %>% filter(!(term %in% c("age_scaled", "smoke_bin1", "race_b"))) %>%
mutate(Time_cat = case_when(
Time == 1 ~ "Less than 6 months",
Time == 2 ~ "6-12 months",
Time == 3 ~ "1-2 years",
Time == 4 ~ "2-5 years",
Time == 5 ~ "5 or more years")
) %>%
mutate(Time_cat = paste(Time_cat,"(N=",N,")")) %>%
mutate(Time=as.numeric(Time)) %>%
mutate(Time_cat = factor(Time_cat, levels=unique(Time_cat[order(-Time)])))
p1 = C %>%
plot_forest(
x = "Time_cat",
#label = "p.stars",
eb_w = 0,
eb_s = 2,
ps = 2,
or_s = 5,
nudge = -0.5
) +
scale_x_discrete(expand = c(0,1)) +
ylab("OR of CH-PD for Cytotoxic Therapy") +
xlab("Time from End Of cytotoxic Therapy") +
panel_theme +
theme(plot.margin = unit(c(0,10,0,0), 'pt')) +
ggtitle("A")
#test of heterogeneity among those with chemo
#Is effect of time significant in those who got cytotoxic therapy?
logit <- D %>% filter(ind_cytotoxic_therapy==1) %>%
glm(formula = ch_pancan_pd ~ pct_cytotoxic_therapy + as.factor(time_cytotoxic_therapy_r) + age + smoke_bin + race_b + timedx_impact, family = binomial(link="logit"), na.action = 'na.omit')
tab_model(logit)
| Â | ch_pancan_pd | ||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.00 | 0.00 – 0.00 | <0.001 |
| pct_cytotoxic_therapy | 1.17 | 1.01 – 1.35 | 0.036 |
|
time_cytotoxic_therapy_r [2] |
1.11 | 0.79 – 1.53 | 0.551 |
|
time_cytotoxic_therapy_r [3] |
1.00 | 0.71 – 1.40 | 0.994 |
|
time_cytotoxic_therapy_r [4] |
1.14 | 0.79 – 1.63 | 0.485 |
|
time_cytotoxic_therapy_r [5] |
1.27 | 0.75 – 2.17 | 0.372 |
| age | 1.07 | 1.06 – 1.08 | <0.001 |
| smoke_bin [1] | 1.09 | 0.89 – 1.34 | 0.401 |
| race_b | 1.02 | 0.80 – 1.33 | 0.849 |
| timedx_impact | 1.00 | 1.00 – 1.00 | 0.175 |
| Observations | 2876 | ||
| R2 Tjur | 0.090 | ||
# Compute buckets of XRT times and limit to people who got XRT wihtout radioisotope therapy or chemotherapy
D <- M_wide %>% filter(ind_cytotoxic_therapy==0 & ind_radiotherapy==0) %>%
mutate(time_end_xrt_r=
cut(time_end_xrt,
breaks=c(Inf,720,180,0),
labels = c(1, 2, 3)))
time_xrt_steps <- D$time_end_xrt_r %>% unique() %>% sort
logit_time_var = list()
for (t in time_xrt_steps) {
D_step <- D %>% filter(time_end_xrt_r==t | eqd_3_t==0)
logit <- glm(formula = ch_pancan_pd ~ eqd_3_t + age_scaled + smoke_bin + race_b,
data = D_step,
family = binomial(link="logit"), na.action = 'na.omit')
logit_data <- logit %>% sjPlot::get_model_data(type="est") %>% cbind(Time = t) %>% cbind(N=nrow(filter(D_step,time_end_xrt_r==t)))
logit_time_var = rbind(logit_time_var, logit_data)
}
C <- logit_time_var %>% filter(!(term %in% c("age_scaled", "smoke_bin1", "race_b"))) %>%
mutate(Time_cat = case_when(
Time == 1 ~ "Less than 6 months",
Time == 2 ~ "6 months-2 years",
Time == 3 ~ "2 or more years")
) %>%
mutate(Time_cat = paste(Time_cat,"(N=",N,")")) %>%
mutate(Time=as.numeric(Time)) %>%
mutate(Time_cat = factor(Time_cat, levels=unique(Time_cat[order(-Time)])))
p2 = C %>%
plot_forest(
x = "Time_cat",
#label = "p.stars",
eb_w = 0,
eb_s = 2,
ps = 2,
or_s = 5,
nudge = -0.5
) +
scale_x_discrete(expand = c(0.025, 0)) +
ylab("OR of CH-PD with EQD2") +
xlab("Time from End Of External Beam Radiation Therapy") +
panel_theme +
theme(plot.margin = unit(c(0,0,0,10), 'pt')) +
ggtitle("B")
#test of heterogeneity among people who got XRT
#Is effect of time significant in those who got XRT?
logit <- D %>% filter(XRT==1) %>%
glm(formula = ch_pancan_pd ~ eqd_3_t + time_end_xrt_r + age + smoke_bin + race_b, family = binomial(link="logit"),na.action = 'na.omit')
tab_model(logit)
| Â | ch_pancan_pd | ||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.00 | 0.00 – 0.01 | <0.001 |
| eqd_3_t | 1.19 | 0.83 – 1.73 | 0.348 |
| time_end_xrt_r [2] | 0.98 | 0.52 – 1.86 | 0.959 |
| time_end_xrt_r [3] | 0.90 | 0.49 – 1.65 | 0.737 |
| age | 1.06 | 1.03 – 1.09 | <0.001 |
| smoke_bin [1] | 1.13 | 0.66 – 1.94 | 0.650 |
| race_b | 1.67 | 0.77 – 4.06 | 0.219 |
| Observations | 397 | ||
| R2 Tjur | 0.083 | ||
panel_theme = theme_bw() + theme(
panel.border = element_blank(),
legend.position = "none",
# panel.grid.minor = element_blank(),
plot.subtitle = element_text(hjust = 0.5, size = 8),
plot.title = element_text(face = 'bold', hjust = 0, vjust = -2, size = 12),
panel.grid.major = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12, angle = 30, hjust = 1),
axis.title = element_text(size = 12),
axis.line = element_line(),
plot.margin = unit(c(0,0,0,0), 'pt')
)
#Chemo
D <- M_long %>% filter(pct_cytotoxic_therapy!=0 & !is.na(time_cytotoxic_therapy)) %>% filter(XRT==0 & ind_radiotherapy==0 & therapy_known==1) %>%
mutate(Time=
cut(time_cytotoxic_therapy,
breaks=c(Inf,1800,720,365,180,-1),
labels = c(1, 2, 3, 4, 5))) %>%
mutate(Time_cat = case_when(
Time == 1 ~ "Less than 6 months",
Time == 2 ~ "6-12 months",
Time == 3 ~ "1-2 years",
Time == 4 ~ "2-5 years",
Time == 5 ~ "5 or more years")
) %>%
mutate(Time=as.numeric(Time)) %>%
mutate(Time_cat = factor(Time_cat, levels=unique(Time_cat[order(Time)])))
model_ch = geeglm(
data = D,
formula = log(VAF_N) ~ age_scaled + race + smoke_bin + time_cytotoxic_therapy + pct_cytotoxic_therapy,
family = gaussian,
corstr = "exchangeable",
id = STUDY_ID)
model_ch %>% summary
##
## Call:
## geeglm(formula = log(VAF_N) ~ age_scaled + race + smoke_bin +
## time_cytotoxic_therapy + pct_cytotoxic_therapy, family = gaussian,
## data = D, id = STUDY_ID, corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -2.963e+00 5.678e-02 2723.206 < 2e-16 ***
## age_scaled 6.178e-02 2.051e-02 9.076 0.00259 **
## raceAsian -4.071e-02 9.036e-02 0.203 0.65235
## raceBlack -2.014e-01 8.765e-02 5.278 0.02160 *
## raceOther -8.163e-02 8.026e-02 1.035 0.30910
## raceUnknown -3.602e-02 1.110e-01 0.105 0.74555
## smoke_bin1 6.743e-02 4.135e-02 2.660 0.10293
## time_cytotoxic_therapy 2.377e-05 1.909e-05 1.551 0.21293
## pct_cytotoxic_therapy -2.128e-02 2.622e-02 0.659 0.41706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = exchangeable
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.5431 0.0212
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.3597 0.05495
## Number of clusters: 1305 Maximum cluster size: 3
p3 <- ggplot(
D,
aes(y = VAF_N, x = Time_cat, fill = Time_cat)
) +
geom_boxplot(outlier.alpha = 0) +
geom_point(position = position_jitterdodge(), pch = 21, color = 'black', size = 1) +
panel_theme + xlab("Time from End Of cytotoxic Therapy") + ylab('VAF') +
scale_y_continuous(expand = c(0.1, 0))
#XRT
D <- M_long %>% filter(eqd_3_t!=0 & !is.na(time_end_xrt)) %>% filter(ind_cytotoxic_therapy==0 & ind_radiotherapy==0 & therapy_known==1) %>%
mutate(Time=
cut(time_end_xrt,
breaks=c(Inf,720,180,-1),
labels = c(1, 2, 3))) %>%
mutate(Time_cat = case_when(
Time == 1 ~ "Less than 6 months",
Time == 2 ~ "6 months-2 years",
Time == 3 ~ "2 or more years")
) %>%
mutate(Time=as.numeric(Time)) %>%
mutate(Time_cat = factor(Time_cat, levels=unique(Time_cat[order(Time)])))
model_ch = geeglm(
data = D,
formula = log(VAF_N) ~ age_scaled + race + smoke_bin + time_end_xrt + eqd_3_t,
family = gaussian,
corstr = "exchangeable",
id = STUDY_ID)
model_ch %>% summary
##
## Call:
## geeglm(formula = log(VAF_N) ~ age_scaled + race + smoke_bin +
## time_end_xrt + eqd_3_t, family = gaussian, data = D, id = STUDY_ID,
## corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -2.94e+00 1.60e-01 337.01 < 2e-16 ***
## age_scaled 7.57e-02 5.15e-02 2.16 0.142
## raceAsian 1.02e-01 2.25e-01 0.21 0.650
## raceBlack -5.04e-01 3.34e-01 2.28 0.131
## raceOther -7.81e-01 1.23e-01 40.19 2.3e-10 ***
## raceUnknown -3.41e-01 1.50e-01 5.18 0.023 *
## smoke_bin1 -9.46e-03 1.03e-01 0.01 0.927
## time_end_xrt 2.75e-05 4.14e-05 0.44 0.507
## eqd_3_t 7.96e-02 6.22e-02 1.64 0.201
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = exchangeable
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.658 0.041
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.405 0.0877
## Number of clusters: 294 Maximum cluster size: 2
p4 <- ggplot(
D,
aes(y = VAF_N, x = Time_cat, fill = Time_cat)
) +
geom_boxplot(outlier.alpha = 0) +
geom_point(position = position_jitterdodge(), pch = 21, color = 'black', size = 1) +
panel_theme + xlab("Time from End of External Beam Radiation Therapy") + ylab('VAF') +
scale_y_continuous(expand = c(0.1, 0))
combo <- p1 + p2 + p3 + p4
do_plot(combo, 'sfigx.png', w = 10, h = 4, save_pdf = F) #no pdf
drug_sets =
get_chemo_column(
class_dict,
drugsets_dict,
class = 'sets',
prefix = 'ind'
) %>%
{.[. %in% colnames(M_wide)]} %>%
paste(collapse = ' + ')
M_wide = M_wide %>% mutate(sumSets = rowSums(select(., contains("ind_ds"))))
#ind multivariate
formula = paste0('ch_pancan_pd ~ ', drug_sets, ' + XRT + age + smoke_bin + race_b + timedx_impact')
D = M_wide %>% glm(formula = as.formula(formula), family = binomial(link="logit"), na.action = 'na.omit') %>%
sjPlot::get_model_data(type = "est",model = .) %>%
cbind(Group = "multi") %>%
mutate(Drug = term) %>%
filter(!(term %in% c("age", "smoke_bin1", "race_b", "timedx_impact", "XRT"))) %>%
mutate(Drug = case_when(
Drug == "ind_ds_carboplatin" ~ "Carboplatin",
Drug == "ind_ds_irino_oxali" ~ "Irinotecan Oxaliplatin",
Drug == "ind_ds_docetaxel" ~ "Docetaxel",
Drug == "ind_ds_irinotecan" ~ "Irinotecan",
Drug == "ind_ds_cis_gem" ~ "Cisplatin Gemcitabine",
Drug == "ind_ds_gemcitabine" ~ "Gemcitabine",
Drug == "ind_ds_cisplatin" ~ "Cisplatin",
Drug == "ind_ds_floxuridine" ~ "Floxuridine",
Drug == "ind_ds_doxorubicin" ~ "Doxorubicin",
Drug == "ind_ds_cyclo_doxo" ~ "Cyclophosphamide Doxorubicin",
Drug == "ind_ds_oxaliplatin" ~ "Oxaliplatin",
Drug == "ind_ds_abr_gem" ~ "Abraxane Gemcitabine",
Drug == "ind_ds_paclitaxel" ~ "Paclitaxel",
Drug == "ind_ds_fluo_oxali" ~ "Fluorouracil Oxaliplatin",
Drug == "ind_ds_carbo_doxo" ~ "Carboplatin Doxorubicin",
Drug == "ind_ds_carbo_pem" ~ "Carboplatin Pemetrexed",
Drug == "ind_ds_carbo_pacli" ~ "Carboplatin Paclitaxel",
Drug == "ind_ds_cis_pem" ~ "Cisplatin Pemetrexed",
Drug == "ind_ds_carbo_gem" ~ "Carboplatin Gemcitabine",
Drug == "ind_ds_cis_etop" ~ "Cisplatin Etoposide",
Drug == "ind_ds_cytotoxic_other" ~ "Other Cytotoxic Drugset")
) %>%
mutate(
Drug = factor(Drug, levels=unique(Drug[order(estimate)]))
)
drug_sets_list=
get_chemo_column(
class_dict,
drugsets_dict,
class = 'sets',
prefix = 'ind')
p = D %>%
plot_forest(
x = "Drug",
label = "p.stars",
eb_w = 0,
eb_s = 1,
ps = 2,
or_s = 3.1,
nudge = -0.5
) +
scale_x_discrete(expand = c(0,1)) +
ylab("OR of CH-PD") +
xlab("Regimen") +
scale_fill_nejm() +
scale_color_nejm()
do_plot(p, 'efig4.png', w = 6, h = 4, save_pdf = F) #no pdf
# Tabulate serial
#Tabulate serial
print('All patients')
## [1] "All patients"
nrow(P_serial)
## [1] 525
print('All mutations')
## [1] "All mutations"
nrow(M2_all)
## [1] 621
print('Therapy Binary')
## [1] "Therapy Binary"
table(P_serial$therapy_binary)
##
## 0 1
## 209 316
print('# of Mutation Positive Patients')
## [1] "# of Mutation Positive Patients"
unique(M2_all$STUDY_ID) %>% length()
## [1] 394
print('Describe FU')
## [1] "Describe FU"
psych::describe(P_serial$delta_time)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 525 734 236 679 718 169 186 1602 1416 0.7 0.75 10.3
print('# of Patients who got treatment')
## [1] "# of Patients who got treatment"
sum(P_serial$therapy_binary)
## [1] 316
print('# of Patients who got no cyto/xrt')
## [1] "# of Patients who got no cyto/xrt"
filter(P_serial,therapy_binary==0) %>% nrow()
## [1] 209
print('Number of Patients with CH at inital timepoint')
## [1] "Number of Patients with CH at inital timepoint"
det <- filter(M2_all,VAF_1>0)
unique(det$STUDY_ID) %>% length()
## [1] 389
print('Number of mutations detected at both timepoints')
## [1] "Number of mutations detected at both timepoints"
filter(M2_all,VAF_1>0 & VAF_2>0) %>% nrow()
## [1] 590
print('Number of mutations detected at only one timepoint')
## [1] "Number of mutations detected at only one timepoint"
nrow(filter(M2_all,VAF_1==0 | VAF_2==0))
## [1] 31
print('Number of mutations detected at first timepoint only')
## [1] "Number of mutations detected at first timepoint only"
nrow(filter(M2_all,VAF_1>0 & VAF_2==0))
## [1] 10
print('Number of mutations detected at second timepoint only')
## [1] "Number of mutations detected at second timepoint only"
nrow(filter(M2_all,VAF_1==0 & VAF_2>0))
## [1] 21
print('Number of newly detected mutations after tx')
## [1] "Number of newly detected mutations after tx"
new_tx <- filter(M2_all,VAF_1==0 & VAF_2>=0.02 & therapy_binary==1)
unique(new_tx$STUDY_ID) %>% length()
## [1] 13
print('Number of newly detected mutations after no tx')
## [1] "Number of newly detected mutations after no tx"
new_tx <- filter(M2_all,VAF_1==0 & VAF_2>0 & therapy_binary==0)
unique(new_tx$STUDY_ID) %>% length()
## [1] 2
print('Table of direction')
## [1] "Table of direction"
table(M2$direction)
##
## DOWN STABLE UP
## 59 367 164
p = ggplot(
M2 %>%
group_by(STUDY_ID) %>%
mutate(n_mut = n()) %>%
mutate(any_ddr_CH = ifelse(any(ddr_CH), 'DDR', 'Other')) %>%
ungroup() %>%
filter(n_mut <= 4) %>%
mutate(therapy = recode(therapy_binary, `1` = 'treated', `0` = 'untreated')) %>%
mutate(therapy = factor(therapy, levels = c('untreated', 'treated'))) %>%
{rbind(
.,
mutate(., any_ddr_CH = 'All')
)},
aes(x = n_mut, y = log(growth_rate), group = factor(n_mut))) +
geom_boxplot(outlier.alpha = 0, color = 'black', fill = 'steelblue') +
geom_jitter(
pch = 21,
size = 1,
alpha = 0.8,
width = 0.1,
color = 'black',
fill = 'gray'
) +
panel_theme +
theme(
legend.position = "none",
strip.background = element_rect(fill = 'grey', size = 0),
panel.border = element_rect(color = "grey", fill = NA, size = 1),
strip.text = element_text(size = 10)
) +
facet_grid(vars(any_ddr_CH), vars(therapy)) +
# scale_fill_nejm() + scale_color_nejm() +
xlab('number of mutations') +
ylab('log growth rate')
do_plot(p, 'supp8.png', w = 6, h = 6, save_pdf = F) #no pdf
D = P_serial %>% left_join(
M2_all %>% group_by(STUDY_ID) %>%
summarise(
ch_t1 = any(VAF_1 >= 0.02),
ch_t2 = any(VAF_2 >= 0.02),
ddr_CH_t1 = any(ddr_CH & VAF_1 >= 0.02),
ddr_CH_t2 = any(ddr_CH & VAF_2 >= 0.02),
non_ddr_CH_t1 = any(!ddr_CH & VAF_1 >= 0.02),
non_ddr_CH_t2 = any(!ddr_CH & VAF_2 >= 0.02),
ddr_CH_gain = any(ddr_CH & VAF_1 != 0 & VAF_1 < 0.02 & VAF_2 >= 0.02),
other_gain = any(!ddr_CH & VAF_1 != 0 & VAF_1 < 0.02 & VAF_2 >= 0.02),
ddr_CH_loss = any(ddr_CH & VAF_1 >= 0.02 & VAF_2 < 0.02),
ddr_CH_loss_complete = any(ddr_CH & VAF_1 >= 0.02 & VAF_2 == 0),
any_loss_complete = any(VAF_1 >= 0.02 & VAF_2 == 0),
other_loss = any(!ddr_CH & VAF_1 >= 0.02 & VAF_2 < 0.02),
other_loss_complete = any(!ddr_CH & VAF_1 >= 0.02 & VAF_2 == 0),
ddr_CH_denovo = any(ddr_CH & VAF_1 == 0 & VAF_2 >= 0.02),
other_denovo = any(!ddr_CH & VAF_1 == 0 & VAF_2 >= 0.02),
any_denovo = any(VAF_1 == 0 & VAF_2 >= 0.02)
),
by = 'STUDY_ID',
) %>%
mutate_at(
c('ch_t1', 'ch_t2', 'ddr_CH_t1', 'ddr_CH_t2',
'non_ddr_CH_t1', 'non_ddr_CH_t2',
'ddr_CH_gain', 'other_gain', 'ddr_CH_loss', 'other_loss',
'ddr_CH_denovo', 'other_denovo'),
function(x){tidyr::replace_na(x, FALSE)}
)
D_loss = D %>%
mutate(
ddr_CH = case_when(
ddr_CH_loss_complete ~ 'complete loss',
# ddr_CH_loss ~ 'loss',
T ~ 'none'
),
other = case_when(
other_loss_complete ~ 'complete loss',
# other_loss ~ 'loss',
T ~ 'none'
),
any = case_when(
any_loss_complete ~ 'complete loss',
# other_loss ~ 'loss',
T ~ 'none'
)
) %>%
melt(measure.vars = c('ddr_CH', 'other', 'any'),
variable.name = 'gene',
value.name = 'event_type') %>%
count(therapy_binary, gene, event_type) %>%
group_by(therapy_binary, gene) %>%
mutate(total = sum(n), prop = n/total)
D_gain = D %>% mutate(
ddr_CH = case_when(
ddr_CH_denovo ~ 'newly observed',
T ~ 'none'
),
other = case_when(
other_denovo ~ 'newly observed',
T ~ 'none'
),
any = case_when(
any_denovo ~ 'newly observed',
T ~ 'none'
)
) %>%
melt(measure.vars = c('ddr_CH', 'other', 'any'),
variable.name = 'gene',
value.name = 'event_type') %>%
count(therapy_binary, gene, event_type) %>%
group_by(therapy_binary, gene) %>%
mutate(total = sum(n), prop = n/total)
D2 = rbind(
D_gain %>% mutate(event = 'gain'),
D_loss %>% mutate(event = 'loss')
) %>%
filter(event_type != 'none') %>%
mutate(event_type = factor(event_type, c('gain', 'newly observed', 'loss', 'complete loss'))) %>%
ungroup() %>%
mutate(therapy_binary = ifelse(therapy_binary == 1, 'treated', 'untreated')) %>%
mutate(gene = case_when(gene == 'ddr_CH' ~ 'DDR',
gene == 'other' ~ 'Other',
gene == 'any' ~ 'Any'))
D2 <- filter(D2,gene=="Any")
p = ggplot(
D2,
aes(x = therapy_binary,
y = prop,
label = n)
) +
geom_bar(stat = 'identity', color = 'black', size = 0.2) +
geom_text(
size = 3,
color = 'white',
position = position_stack(vjust = 0.5)
) +
facet_grid(~event_type) +
theme(
# strip.background = element_rect(fill = 'white', color = 'black'),
legend.title = element_blank()
) +
xlab('') +
ylab('proportion of patients') +
scale_fill_nejm()
do_plot(p, "supp_fig9.png", w = 6, h = 4, save_pdf = F) #no pdf
D2 %>% filter(event_type=="newly observed") %>%
group_by(event_type, therapy_binary) %>%
summarise(n = sum(n), total = sum(unique(total))) %>%
{prop.test(.$n, .$total)}
##
## 2-sample test for equality of proportions with continuity correction
##
## data: .$n out of .$total
## X-squared = 3, df = 1, p-value = 0.06
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.00203 0.06111
## sample estimates:
## prop 1 prop 2
## 0.04114 0.00957
D2 %>% filter(event_type=="complete loss") %>%
group_by(event_type, therapy_binary) %>%
summarise(n = sum(n), total = sum(unique(total))) %>%
{prop.test(.$n, .$total)}
## Warning in prop.test(.$n, .$total): Chi-squared approximation may be incorrect
##
## 2-sample test for equality of proportions with continuity correction
##
## data: .$n out of .$total
## X-squared = 0.9, df = 1, p-value = 0.4
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.0373 0.0117
## sample estimates:
## prop 1 prop 2
## 0.00633 0.01914
D <- M2 %>% filter(ddr_CH==1 & ind_cytotoxic_therapy1==1 & XRT1==0)
p3 <- ggplot(D, aes(x=time_end_cyto1, y=delta_vaf)) +
geom_point() +
geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
ylab('Change in VAF') +
xlab('Number of days since end of cytotoxic therapy')
p <- D %>% lm(formula = delta_vaf ~ time_end_cyto1 + pct_cytotoxic_therapy1, data = .) %>% tab_model()
do_plot(p3, 'time_serial.png', w = 6, h = 6, save_pdf = F) #no pdf
d <- M_tmn_wide_st %>% mutate(
diagnosis=ifelse(post_tmn==1,"tMN","No tMN"))
# Factor the basic variables that
# we're interested in
d$diagnosis <-
factor(d$diagnosis,
levels=c("No tMN", "tMN"))
d$Gender <- factor(d$Gender,
levels=c("F", "M"),
labels=c("Female","Male"))
d$VAF_nonsilent_r <-
factor(d$VAF_nonsilent_r,
labels=c("Negative", # Reference
"2-5%",
"5-10%",
"10-20%",
">20%"))
label(d$VAF_nonsilent_r) <- "Maximum VAF CH-myeloid PD"
d$n_mut_r <-
factor(d$n_mut_r,
labels=c("Negative", # Reference
"1",
"2",
"3 or more"))
label(d$n_mut_r) <- "Number of CH-myeloid PD mutations"
d$center <-
factor(d$center,
levels=c("MSK", "MOF", "MDA", "HCC"),
labels=c("MSK", # Reference
"MOF",
"MDA",
"DFC"))
label(d$age) <- "Age"
label(d$yearslastfu) <- "Years of follow-up"
label(d$hgb) <- "Hemoglobin"
label(d$wbc) <- "White Blood Cell Count"
label(d$plt) <- "Platelet"
label(d$anc) <- "Absolute Neutrophil Count"
label(d$mcv) <- "Mean Corpuscular Volume"
label(d$rdw) <- "Red Cell Distribution Width"
d <- d %>% mutate(generaltumortype=ifelse(generaltumortype=="","Other",generaltumortype))
label(d$generaltumortype) <- "Primary Tumor Subtype"
p = table1(~ Gender + age + generaltumortype + yearslastfu + hgb + rdw + mcv + wbc + anc + plt + n_mut_r + VAF_nonsilent_r | center*diagnosis, data=d, overall=F, output="markdown", export="ktab")
p
MSK |
MOF |
MDA |
DFC |
|||||
|---|---|---|---|---|---|---|---|---|
| No tMN (N=8871) |
tMN (N=30) |
No tMN (N=54) |
tMN (N=13) |
No tMN (N=54) |
tMN (N=14) |
No tMN (N=383) |
tMN (N=18) |
|
| Gender | ||||||||
| Female | 4919 (55.5%) | 12 (40.0%) | 23 (42.6%) | 6 (46.2%) | 25 (46.3%) | 3 (21.4%) | 143 (37.3%) | 6 (33.3%) |
| Male | 3952 (44.5%) | 18 (60.0%) | 31 (57.4%) | 7 (53.8%) | 29 (53.7%) | 11 (78.6%) | 240 (62.7%) | 12 (66.7%) |
| Age | ||||||||
| Mean (SD) | 58.5 (15.0) | 54.2 (22.4) | 72.5 (5.77) | 72.4 (4.61) | 56.4 (9.91) | 56.4 (13.6) | 55.9 (10.7) | 57.7 (8.46) |
| Median [Min, Max] | 60.6 [0.249, 98.7] | 63.2 [9.82, 80.7] | 73.0 [61.0, 87.0] | 73.0 [64.0, 79.0] | 57.8 [40.1, 79.2] | 62.5 [25.0, 74.0] | 58.0 [19.0, 77.0] | 60.5 [34.0, 67.0] |
| Primary Tumor Subtype | ||||||||
| Adrenocortical Carcinoma | 10 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Ampullary Carcinoma | 26 (0.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Anal Cancer | 23 (0.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Appendiceal Cancer | 76 (0.9%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Biliary Cancer | 244 (2.8%) | 2 (6.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Bladder Cancer | 191 (2.2%) | 2 (6.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Breast Carcinoma | 1259 (14.2%) | 3 (10.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Breast Sarcoma | 7 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Cancer of Unknown Primary | 287 (3.2%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Cervical Cancer | 43 (0.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Chondrosarcoma | 10 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Chordoma | 7 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Colorectal Cancer | 1030 (11.6%) | 2 (6.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Embryonal Tumor | 54 (0.6%) | 2 (6.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Endometrial Cancer | 334 (3.8%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Ependymomal Tumor | 7 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Esophagogastric Carcinoma | 433 (4.9%) | 2 (6.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Ewing Sarcoma | 45 (0.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Gastrointestinal Neuroendocrine Tumor | 25 (0.3%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Gastrointestinal Stromal Tumor | 12 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Germ Cell Tumor | 151 (1.7%) | 3 (10.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Gestational Trophoblastic Disease | 5 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Glioma | 403 (4.5%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Head and Neck Carcinoma | 175 (2.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Hepatocellular Carcinoma | 30 (0.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Melanoma | 94 (1.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Meningothelial Tumor | 7 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Mesothelioma | 101 (1.1%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Miscellaneous Brain Tumor | 4 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Miscellaneous Neuroepithelial Tumor | 2 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Nerve Sheath Tumor | 11 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Non-Small Cell Lung Cancer | 1495 (16.9%) | 3 (10.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Osteosarcoma | 56 (0.6%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Ovarian Cancer | 344 (3.9%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Pancreatic Cancer | 709 (8.0%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Penile Cancer | 1 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Pheochromocytoma | 1 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Prostate Cancer | 368 (4.1%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Renal Cell Carcinoma | 76 (0.9%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Retinoblastoma | 6 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Salivary Carcinoma | 42 (0.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Sellar Tumor | 3 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Sex Cord Stromal Tumor | 4 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Skin Cancer, Non-Melanoma | 42 (0.5%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Small Bowel Cancer | 44 (0.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Small Cell Lung Cancer | 112 (1.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Soft Tissue Sarcoma | 272 (3.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Thymic Tumor | 13 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Thyroid Cancer | 89 (1.0%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Uterine Sarcoma | 55 (0.6%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Vaginal Cancer | 5 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Wilms Tumor | 8 (0.1%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Other | 0 (0%) | 0 (0%) | 54 (100%) | 13 (100%) | 54 (100%) | 14 (100%) | 383 (100%) | 18 (100%) |
| Missing | 20 (0.2%) | 1 (3.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Years of follow-up | ||||||||
| Mean (SD) | 1.38 (0.963) | 1.42 (0.843) | 3.60 (2.80) | 2.51 (2.04) | 5.00 (0) | 3.57 (2.10) | 5.19 (2.91) | 5.64 (2.77) |
| Median [Min, Max] | 1.17 [0, 4.49] | 1.34 [0.287, 3.31] | 2.80 [0.164, 10.0] | 1.66 [0.378, 7.40] | 5.00 [5.00, 5.00] | 3.00 [1.00, 8.00] | 5.53 [0.0740, 11.9] | 6.45 [1.04, 12.1] |
| Hemoglobin | ||||||||
| Mean (SD) | 12.3 (1.87) | 11.4 (1.51) | 12.7 (1.52) | 12.2 (2.28) | 12.8 (2.07) | 12.2 (1.81) | 11.9 (1.65) | 11.4 (2.23) |
| Median [Min, Max] | 12.4 [5.30, 19.0] | 11.6 [7.30, 15.0] | 12.7 [9.60, 16.0] | 12.3 [8.20, 17.4] | 13.1 [4.10, 16.3] | 12.0 [9.00, 15.3] | 11.8 [7.90, 16.8] | 10.9 [8.40, 15.6] |
| Missing | 16 (0.2%) | 2 (6.7%) | 2 (3.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 35 (9.1%) | 0 (0%) |
| Red Cell Distribution Width | ||||||||
| Mean (SD) | 14.5 (2.29) | 15.6 (2.62) | 47.8 (9.34) | 52.3 (10.7) | NA (NA) | NA (NA) | 16.8 (5.04) | 16.7 (3.38) |
| Median [Min, Max] | 13.8 [10.7, 30.6] | 14.9 [12.3, 23.4] | 47.3 [12.9, 66.4] | 48.5 [41.6, 75.1] | NA [NA, NA] | NA [NA, NA] | 15.9 [6.00, 73.0] | 15.7 [13.0, 24.9] |
| Missing | 18 (0.2%) | 2 (6.7%) | 2 (3.7%) | 0 (0%) | 54 (100%) | 14 (100%) | 117 (30.5%) | 3 (16.7%) |
| Mean Corpuscular Volume | ||||||||
| Mean (SD) | 89.3 (6.69) | 93.4 (9.45) | 93.4 (4.68) | 93.5 (5.39) | NA (NA) | NA (NA) | 91.4 (6.31) | 91.3 (5.23) |
| Median [Min, Max] | 90.0 [9.00, 123] | 95.0 [77.0, 109] | 93.1 [84.3, 105] | 93.3 [83.8, 103] | NA [NA, NA] | NA [NA, NA] | 91.5 [38.0, 109] | 90.3 [81.4, 99.2] |
| Missing | 13 (0.1%) | 2 (6.7%) | 2 (3.7%) | 0 (0%) | 54 (100%) | 14 (100%) | 35 (9.1%) | 0 (0%) |
| White Blood Cell Count | ||||||||
| Mean (SD) | 7.58 (4.05) | 5.63 (2.51) | 6.55 (2.19) | 6.05 (2.60) | 7.14 (2.81) | 5.94 (1.81) | 5.76 (3.17) | 5.20 (1.90) |
| Median [Min, Max] | 6.80 [0.100, 81.6] | 5.10 [2.10, 12.0] | 6.16 [2.68, 14.8] | 5.21 [3.01, 12.8] | 6.70 [3.20, 19.5] | 5.50 [3.60, 9.40] | 5.10 [0.600, 40.3] | 4.93 [2.30, 9.30] |
| Missing | 17 (0.2%) | 2 (6.7%) | 2 (3.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 35 (9.1%) | 0 (0%) |
| Absolute Neutrophil Count | ||||||||
| Mean (SD) | 5.29 (3.67) | 3.85 (2.18) | 6.22 (11.0) | 3.31 (1.14) | 5.06 (2.63) | 3.72 (1.57) | 4.01 (2.49) | 3.74 (1.54) |
| Median [Min, Max] | 4.50 [0, 77.1] | 3.30 [0.800, 9.10] | 4.16 [1.67, 77.5] | 3.12 [1.40, 5.95] | 4.51 [1.85, 16.6] | 3.20 [1.80, 6.64] | 3.51 [0, 23.0] | 3.36 [1.33, 7.82] |
| Missing | 55 (0.6%) | 2 (6.7%) | 8 (14.8%) | 0 (0%) | 0 (0%) | 0 (0%) | 34 (8.9%) | 0 (0%) |
| Platelet | ||||||||
| Mean (SD) | 262 (109) | 209 (99.4) | 175 (64.2) | 166 (56.8) | 275 (95.6) | 208 (51.6) | 237 (133) | 154 (92.7) |
| Median [Min, Max] | 244 [8.00, 1120] | 193 [10.0, 402] | 170 [22.0, 320] | 150 [103, 281] | 256 [105, 559] | 214 [96.0, 303] | 213 [5.00, 907] | 137 [15.0, 316] |
| Missing | 28 (0.3%) | 2 (6.7%) | 2 (3.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 38 (9.9%) | 0 (0%) |
| Number of CH-myeloid PD mutations | ||||||||
| Negative | 7508 (84.6%) | 16 (53.3%) | 42 (77.8%) | 7 (53.8%) | 54 (100%) | 5 (35.7%) | 292 (76.2%) | 8 (44.4%) |
| 1 | 1117 (12.6%) | 6 (20.0%) | 8 (14.8%) | 3 (23.1%) | 0 (0%) | 5 (35.7%) | 42 (11.0%) | 1 (5.6%) |
| 2 | 191 (2.2%) | 5 (16.7%) | 4 (7.4%) | 0 (0%) | 0 (0%) | 1 (7.1%) | 14 (3.7%) | 1 (5.6%) |
| 3 or more | 55 (0.6%) | 3 (10.0%) | 0 (0%) | 3 (23.1%) | 0 (0%) | 3 (21.4%) | 35 (9.1%) | 8 (44.4%) |
| Maximum VAF CH-myeloid PD | ||||||||
| Negative | 7508 (84.6%) | 16 (53.3%) | 42 (77.8%) | 7 (53.8%) | 54 (100%) | 5 (35.7%) | 292 (76.2%) | 8 (44.4%) |
| 2-5% | 616 (6.9%) | 1 (3.3%) | 5 (9.3%) | 1 (7.7%) | 0 (0%) | 1 (7.1%) | 41 (10.7%) | 4 (22.2%) |
| 5-10% | 345 (3.9%) | 2 (6.7%) | 2 (3.7%) | 2 (15.4%) | 0 (0%) | 2 (14.3%) | 27 (7.0%) | 3 (16.7%) |
| 10-20% | 228 (2.6%) | 5 (16.7%) | 3 (5.6%) | 1 (7.7%) | 0 (0%) | 3 (21.4%) | 8 (2.1%) | 2 (11.1%) |
| >20% | 174 (2.0%) | 6 (20.0%) | 2 (3.7%) | 2 (15.4%) | 0 (0%) | 3 (21.4%) | 15 (3.9%) | 1 (5.6%) |
#Tabulate tMN analysis
print('All tmn risk set samples')
## [1] "All tmn risk set samples"
table(M_tmn_wide_st$post_tmn)
##
## 0 1
## 9362 75
#Tabulate paired tMN analysis
print('All paired samples')
## [1] "All paired samples"
filter(M_tmn_wide_st,has_serial == 1) %>% nrow()
## [1] 35
print('Time to trans')
## [1] "Time to trans"
case <- filter(M_tmn_wide_st,has_serial==1)
psych::describe(case$timelastfu)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 35 864 599 730 786 542 138 2702 2564 1.14 1.08 101
print("Mutation positive at time of CH")
## [1] "Mutation positive at time of CH"
M_tmn_long %>% filter(has_serial == 1 & VAF_1>0) %>% pull(center_id) %>% unique() %>% length()
## [1] 19
print("Multiple mutations at time of CH")
## [1] "Multiple mutations at time of CH"
testing<- M_tmn_long %>% filter(has_serial == 1 & VAF_1>0) %>% group_by(center_id) %>% summarise(mutnum_ch = length(VAF_1>0))
filter(testing,mutnum_ch>1) %>% nrow()
## [1] 13
print("Has a TP53 mutation at tMN")
## [1] "Has a TP53 mutation at tMN"
filter(M_tmn_long,has_serial == 1 & Gene=="TP53" & VAF_2>0) %>% pull(center_id) %>% unique() %>% length()
## [1] 14
print("Has a TP53 mutation and is detectable at CH")
## [1] "Has a TP53 mutation and is detectable at CH"
filter(M_tmn_long,has_serial == 1 & Gene=="TP53" & VAF_1>0) %>% pull(center_id) %>% unique() %>% length()
## [1] 10
print("Among mutation positive, has a TP53 mutation and karyotype abnormal")
## [1] "Among mutation positive, has a TP53 mutation and karyotype abnormal"
filter(M_tmn_long,has_serial == 1 & Gene=="TP53" & M_tmn_long$karyotype_classification=="abnormal") %>% pull(center_id) %>% unique() %>% length()
## [1] 12
#Full CH model only including genes present in all studies
gene_vars = c("TET2","TP53","U2AF1","SF3B1")
mut_vars = c("VAF_nonsilent_r","n_mut_r")
demo_vars = c('age', 'Gender','strata(center)')
D = M_tmn_wide_st
form = paste0(
'Surv(timelastfu, post_tmn) ~ ',
paste(
c(
gene_vars,
mut_vars,
demo_vars
),
collapse = ' + '
)
)
cox <- coxph(formula = as.formula(form), data = D, na.action = 'na.omit')
multi <- cox %>% get_model_data(type="est")
multi2 <- multi %>% mutate(group=case_when(
term %in% gene_vars ~ "Gene",
term=="n_mut_r" ~ "Mutation Number",
term=="VAF_nonsilent_r" ~ "Maximum VAF"
))
multi2 <- multi2 %>% rename(mut_var=term) %>% cbind(study="tMN")
#Main effect of CH
M_tmn_h <- M_tmn_h %>% mutate(center =fct_relevel(center, "PMC", "WSU"))
cox <- coxph(Surv(timelastfu, post_tmn) ~ ch_my_pd + age + Gender + strata(center), data= M_tmn_h, na.action = 'na.omit')
cox
## Call:
## coxph(formula = Surv(timelastfu, post_tmn) ~ ch_my_pd + age +
## Gender + strata(center), data = M_tmn_h, na.action = "na.omit")
##
## coef exp(coef) se(coef) z p
## ch_my_pd 1.910 6.755 0.151 13 <2e-16
## age -0.018 0.982 0.009 -2 0.03
## GenderM 0.005 1.005 0.150 0 0.97
##
## Likelihood ratio test=136 on 3 df, p=<2e-16
## n= 1072, number of events= 186
cox <- coxph(Surv(timelastfu, post_tmn) ~ ch_my_pd*center + age + Gender + strata(center), data=M_tmn_h, na.action = 'na.omit')
cox
## Call:
## coxph(formula = Surv(timelastfu, post_tmn) ~ ch_my_pd * center +
## age + Gender + strata(center), data = M_tmn_h, na.action = "na.omit")
##
## coef exp(coef) se(coef) z p
## ch_my_pd 2.022 7.554 0.166 12.2 <2e-16
## centerWSU NA NA 0.000 NA NA
## age -0.020 0.980 0.009 -2.3 0.02
## GenderM 0.050 1.051 0.152 0.3 0.74
## ch_my_pd:centerWSU -0.718 0.488 0.440 -1.6 0.10
##
## Likelihood ratio test=139 on 4 df, p=<2e-16
## n= 1072, number of events= 186
#Full model with max VAF and mutation number modeled as continuous
D = M_tmn_h
form = paste0(
'Surv(timelastfu, post_tmn) ~ ',
paste(
c(
gene_vars,
mut_vars,
demo_vars
),
collapse = ' + '
)
)
cox <- coxph(formula = as.formula(form), data = D, na.action = 'na.omit')
multi <- cox %>% get_model_data(type="est")
multi2_h <- multi %>% mutate(group=case_when(
term %in% gene_vars ~ "Gene",
term=="n_mut_r" ~ "Mutation Number",
term=="VAF_nonsilent_r" ~ "Maximum VAF"
))
multi2_h <- multi2_h %>% rename(mut_var=term) %>% cbind(study="Primary AML")
combo <- rbind(multi2_h, multi2)
combo <- combo %>% mutate(mut_var2=case_when(mut_var=="VAF_nonsilent_r" ~ "Maximum VAF",
mut_var=="n_mut_r" ~ "Mutation Number",
TRUE ~ as.character(mut_var)))
p <- combo %>%
plot_forest(
x = "mut_var2",
fill ="study",
col = "study",
eb_w = 0,
eb_s = 0.3,
ps = 1.5,
or_s = 2,
nudge = -0.5,
dodge_width = 0.5)+
scale_fill_nejm() +
scale_color_nejm() +
ylab('HR') +
xlab("") +
facet_grid(group ~ ., scales = "free_y", space = "free_y") +
# panel_theme +
theme(
strip.placement = 'top',
strip.text = element_blank(),
axis.text.y = element_text(size = 7),
axis.title = element_text(size = 7),
axis.text.x = element_text(size = 7),
legend.text = element_text(size=7),
legend.title = element_text(size=7)
)
do_plot(p, "supp_figure_10.png", w = 5, h = 4, save_pdf = F) #no pdf